From f5cd36ad7296f03120fb689a2b6740dff3236514 Mon Sep 17 00:00:00 2001 From: Peng Zhou Date: Wed, 26 Dec 2018 09:59:26 +0800 Subject: [PATCH 01/58] ANDROID: f2fs: Complement "android_fs" tracepoint of read path It's only in DIO before, complement for BIO. Bug: 120445624 Change-Id: I90b6fb15e355978da8805ed6306c595819be989d Signed-off-by: Peng Zhou --- fs/f2fs/data.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7c42cfe6365..6552b4875327 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -143,6 +143,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_READ_IO)) { f2fs_show_injection_info(FAULT_READ_IO); @@ -157,6 +159,13 @@ static void f2fs_read_end_io(struct bio *bio) return; } + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + __read_end_io(bio); } @@ -324,6 +333,32 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, submit_bio(bio); } +static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + if (trace_android_fs_dataread_start_enabled() && (type == DATA)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL && + __read_io_type(first_page) == F2FS_RD_DATA) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -471,7 +506,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page): WB_DATA_TYPE(fio->page)); - __submit_bio(fio->sbi, bio, fio->type); + __f2fs_submit_read_bio(fio->sbi, bio, fio->type); return 0; } @@ -601,7 +636,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1601,7 +1636,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (bio && (last_block_in_bio != block_nr - 1 || !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { @@ -1633,7 +1668,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } unlock_page(page); @@ -1643,7 +1678,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio, DATA); + __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return 0; } From 89ce7df8cf98279218a2f02628bd4b7c4c2e968f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 14 Jan 2019 17:24:18 -0700 Subject: [PATCH 02/58] Makefile: Fix 4.19.15 resolution Commit 1637d5d2e299 ("kbuild: consolidate Clang compiler flags") conflicts with commit 4c451dba25d2 ("ANDROID: Kbuild, LLVMLinux: allow overriding clang target triple"). As it currently stands, CLANG_FLAGS will not have the proper target parameter when cross compiling for aarch64-linux-android. Move the CLANG_FLAGS definition up and use it when checking for the proper target fallback, allowing us to properly remove CLANG_TARGET. Change-Id: I176146ad613bd5e6187f3421f29c2ffcc9ceebc3 Signed-off-by: Nathan Chancellor --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1d06385038d9..a9230bccd043 100644 --- a/Makefile +++ b/Makefile @@ -483,11 +483,10 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TRIPLE ?= $(CROSS_COMPILE) -CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) -ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y) +CLANG_FLAGS := --target=$(notdir $(CLANG_TRIPLE:%-=%)) +ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y) $(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") endif -CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) From 9133e3486a30373400528ea3a253d076eb9c808e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 15 Jan 2019 16:35:27 -0800 Subject: [PATCH 03/58] ANDROID: cuttlefish: enable CONFIG_NET_CLS_BPF=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to mirror https://android-review.googlesource.com/c/kernel/configs/+/870517 android-4.9+: add CONFIG_NET_CLS_BPF to base Generated via: echo 'CONFIG_NET_CLS_BPF=y' >> arch/x86/configs/x86_64_cuttlefish_defconfig echo 'CONFIG_NET_CLS_BPF=y' >> arch/arm64/configs/cuttlefish_defconfig make ARCH=x86_64 x86_64_cuttlefish_defconfig make ARCH=x86_64 savedefconfig cat defconfig > arch/x86/configs/x86_64_cuttlefish_defconfig make ARCH=arm64 cuttlefish_defconfig make ARCH=arm64 savedefconfig cat defconfig > arch/arm64/configs/cuttlefish_defconfig Bug: 65674744 Change-Id: I8e4dfe7a99d38fd5942001a1aab83b7ac9df30dd Signed-off-by: Maciej Żenczykowski --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index b46a67f8c2e9..0b87fc685e8d 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -178,6 +178,7 @@ CONFIG_L2TP=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y CONFIG_NET_CLS_U32=y +CONFIG_NET_CLS_BPF=y CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_U32=y CONFIG_NET_CLS_ACT=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 90d73c393f91..f4544e8fec1b 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -183,6 +183,7 @@ CONFIG_IP6_NF_RAW=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y CONFIG_NET_CLS_U32=y +CONFIG_NET_CLS_BPF=y CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_U32=y CONFIG_NET_CLS_ACT=y From 3a49374afc8bb33531e24a2d2f109f69e4b2d9c8 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 16 Jan 2019 09:42:37 -0500 Subject: [PATCH 04/58] Revert "UPSTREAM: mm/memfd: make F_SEAL_FUTURE_WRITE seal more robust" This reverts commit 2e0d7ea44a5db08b00dc0a0548ee1f668b15da48. Change-Id: Id8ca9575c75db3eeb06b7aa7217a59c85e55d0ac --- fs/hugetlbfs/inode.c | 2 +- mm/memfd.c | 19 +++++++++++++++++++ mm/shmem.c | 24 +++--------------------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1978581abfdf..32920a10100e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) inode_lock(inode); /* protected by i_mutex */ - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { + if (info->seals & F_SEAL_WRITE) { inode_unlock(inode); return -EPERM; } diff --git a/mm/memfd.c b/mm/memfd.c index a9ece5fab439..5ba9804e9515 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -220,6 +220,25 @@ static int memfd_add_seals(struct file *file, unsigned int seals) } } + if ((seals & F_SEAL_FUTURE_WRITE) && + !(*file_seals & F_SEAL_FUTURE_WRITE)) { + /* + * The FUTURE_WRITE seal also prevents growing and shrinking + * so we need them to be already set, or requested now. + */ + int test_seals = (seals | *file_seals) & + (F_SEAL_GROW | F_SEAL_SHRINK); + + if (test_seals != (F_SEAL_GROW | F_SEAL_SHRINK)) { + error = -EINVAL; + goto unlock; + } + + spin_lock(&file->f_lock); + file->f_mode &= ~(FMODE_WRITE | FMODE_PWRITE); + spin_unlock(&file->f_lock); + } + *file_seals |= seals; error = 0; diff --git a/mm/shmem.c b/mm/shmem.c index e9949350743e..b6cf0e8e685b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2169,23 +2169,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { - struct shmem_inode_info *info = SHMEM_I(file_inode(file)); - - /* - * New PROT_READ and MAP_SHARED mmaps are not allowed when "future - * write" seal active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE) && - (info->seals & F_SEAL_FUTURE_WRITE)) - return -EPERM; - - /* - * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED read-only - * mapping, take care to not allow mprotect to revert protections. - */ - if (info->seals & F_SEAL_FUTURE_WRITE) - vma->vm_flags &= ~(VM_MAYWRITE); - file_accessed(file); vma->vm_ops = &shmem_vm_ops; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && @@ -2439,9 +2422,8 @@ shmem_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; /* i_mutex is held by caller */ - if (unlikely(info->seals & (F_SEAL_GROW | - F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) + if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) { + if (info->seals & F_SEAL_WRITE) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) return -EPERM; @@ -2704,7 +2686,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); /* protected by i_mutex */ - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { + if (info->seals & F_SEAL_WRITE) { error = -EPERM; goto out; } From 198aac25b7ce56e879e3a82967aec43015d632b0 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 16 Jan 2019 09:42:39 -0500 Subject: [PATCH 05/58] Revert "UPSTREAM: mm: Add an F_SEAL_FUTURE_WRITE seal to memfd" This reverts commit 1dc8ca44292047348dc291607a558e01331e0891. Change-Id: I513034073c278e2ae58a53352cc2553a256a7ee0 --- include/uapi/linux/fcntl.h | 1 - mm/memfd.c | 22 +--------------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a2f8658f1c55..6448cdd9a350 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -41,7 +41,6 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ -#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/mm/memfd.c b/mm/memfd.c index 5ba9804e9515..2bb5e257080e 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -150,8 +150,7 @@ static unsigned int *memfd_file_seals_ptr(struct file *file) #define F_ALL_SEALS (F_SEAL_SEAL | \ F_SEAL_SHRINK | \ F_SEAL_GROW | \ - F_SEAL_WRITE | \ - F_SEAL_FUTURE_WRITE) + F_SEAL_WRITE) static int memfd_add_seals(struct file *file, unsigned int seals) { @@ -220,25 +219,6 @@ static int memfd_add_seals(struct file *file, unsigned int seals) } } - if ((seals & F_SEAL_FUTURE_WRITE) && - !(*file_seals & F_SEAL_FUTURE_WRITE)) { - /* - * The FUTURE_WRITE seal also prevents growing and shrinking - * so we need them to be already set, or requested now. - */ - int test_seals = (seals | *file_seals) & - (F_SEAL_GROW | F_SEAL_SHRINK); - - if (test_seals != (F_SEAL_GROW | F_SEAL_SHRINK)) { - error = -EINVAL; - goto unlock; - } - - spin_lock(&file->f_lock); - file->f_mode &= ~(FMODE_WRITE | FMODE_PWRITE); - spin_unlock(&file->f_lock); - } - *file_seals |= seals; error = 0; From dec031f64072325c4dc5b57df9b9e2b2838a23ff Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 12 Jan 2019 15:38:15 -0500 Subject: [PATCH 06/58] UPSTREAM: mm/memfd: Add an F_SEAL_FUTURE_WRITE seal to memfd Android uses ashmem for sharing memory regions. We are looking forward to migrating all usecases of ashmem to memfd so that we can possibly remove the ashmem driver in the future from staging while also benefiting from using memfd and contributing to it. Note staging drivers are also not ABI and generally can be removed at anytime. One of the main usecases Android has is the ability to create a region and mmap it as writeable, then add protection against making any "future" writes while keeping the existing already mmap'ed writeable-region active. This allows us to implement a usecase where receivers of the shared memory buffer can get a read-only view, while the sender continues to write to the buffer. See CursorWindow documentation in Android for more details: https://developer.android.com/reference/android/database/CursorWindow This usecase cannot be implemented with the existing F_SEAL_WRITE seal. To support the usecase, this patch adds a new F_SEAL_FUTURE_WRITE seal which prevents any future mmap and write syscalls from succeeding while keeping the existing mmap active. A better way to do F_SEAL_FUTURE_WRITE seal was discussed [1] last week where we don't need to modify core VFS structures to get the same behavior of the seal. This solves several side-effects pointed by Andy. self-tests are provided in later patch to verify the expected semantics. [1] https://lore.kernel.org/lkml/20181111173650.GA256781@google.com/ [Thanks a lot to Andy for suggestions to improve code] Cc: Andy Lutomirski Signed-off-by: Joel Fernandes (Google) Acked-by: John Stultz Change-Id: I6710c045954378f87bfbff6311d372a3b8549064 --- fs/hugetlbfs/inode.c | 2 +- include/uapi/linux/fcntl.h | 1 + mm/memfd.c | 3 ++- mm/shmem.c | 25 ++++++++++++++++++++++--- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 32920a10100e..1978581abfdf 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) inode_lock(inode); /* protected by i_mutex */ - if (info->seals & F_SEAL_WRITE) { + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { inode_unlock(inode); return -EPERM; } diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd9a350..a2f8658f1c55 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/mm/memfd.c b/mm/memfd.c index 2bb5e257080e..a9ece5fab439 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -150,7 +150,8 @@ static unsigned int *memfd_file_seals_ptr(struct file *file) #define F_ALL_SEALS (F_SEAL_SEAL | \ F_SEAL_SHRINK | \ F_SEAL_GROW | \ - F_SEAL_WRITE) + F_SEAL_WRITE | \ + F_SEAL_FUTURE_WRITE) static int memfd_add_seals(struct file *file, unsigned int seals) { diff --git a/mm/shmem.c b/mm/shmem.c index b6cf0e8e685b..645885c0fe6f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2169,6 +2169,24 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { + struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + + if (info->seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED + * read-only mapping, take care to not allow mprotect to revert + * protections. + */ + vma->vm_flags &= ~(VM_MAYWRITE); + } + file_accessed(file); vma->vm_ops = &shmem_vm_ops; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && @@ -2422,8 +2440,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; /* i_mutex is held by caller */ - if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) { - if (info->seals & F_SEAL_WRITE) + if (unlikely(info->seals & (F_SEAL_GROW | + F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) return -EPERM; @@ -2686,7 +2705,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); /* protected by i_mutex */ - if (info->seals & F_SEAL_WRITE) { + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { error = -EPERM; goto out; } From ac8ed5f99ad0875995bb4bb6a55f00d6bddd97b8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 12 Jan 2019 15:38:16 -0500 Subject: [PATCH 07/58] UPSTREAM: selftests/memfd: Add tests for F_SEAL_FUTURE_WRITE seal Add tests to verify sealing memfds with the F_SEAL_FUTURE_WRITE works as expected. Cc: dancol@google.com Cc: minchan@kernel.org Cc: Jann Horn Cc: John Stultz Signed-off-by: Joel Fernandes (Google) Reviewed-by: Shuah Khan Change-Id: I42a0ceb1f584a5444ec755f8c76899499bcbaddd --- tools/testing/selftests/memfd/memfd_test.c | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 10baa1652fc2..c67d32eeb668 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -54,6 +54,22 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) return fd; } +static int mfd_assert_reopen_fd(int fd_in) +{ + int r, fd; + char path[100]; + + sprintf(path, "/proc/self/fd/%d", fd_in); + + fd = open(path, O_RDWR); + if (fd < 0) { + printf("re-open of existing fd %d failed\n", fd_in); + abort(); + } + + return fd; +} + static void mfd_fail_new(const char *name, unsigned int flags) { int r; @@ -255,6 +271,25 @@ static void mfd_assert_read(int fd) munmap(p, mfd_def_size); } +/* Test that PROT_READ + MAP_SHARED mappings work. */ +static void mfd_assert_read_shared(int fd) +{ + void *p; + + /* verify PROT_READ and MAP_SHARED *is* allowed */ + p = mmap(NULL, + mfd_def_size, + PROT_READ, + MAP_SHARED, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + munmap(p, mfd_def_size); +} + static void mfd_assert_write(int fd) { ssize_t l; @@ -692,6 +727,44 @@ static void test_seal_write(void) close(fd); } +/* + * Test SEAL_FUTURE_WRITE + * Test whether SEAL_FUTURE_WRITE actually prevents modifications. + */ +static void test_seal_future_write(void) +{ + int fd, fd2; + void *p; + + printf("%s SEAL-FUTURE-WRITE\n", memfd_str); + + fd = mfd_assert_new("kern_memfd_seal_future_write", + mfd_def_size, + MFD_CLOEXEC | MFD_ALLOW_SEALING); + + p = mfd_assert_mmap_shared(fd); + + mfd_assert_has_seals(fd, 0); + + mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE); + mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE); + + /* read should pass, writes should fail */ + mfd_assert_read(fd); + mfd_assert_read_shared(fd); + mfd_fail_write(fd); + + fd2 = mfd_assert_reopen_fd(fd); + /* read should pass, writes should still fail */ + mfd_assert_read(fd2); + mfd_assert_read_shared(fd2); + mfd_fail_write(fd2); + + munmap(p, mfd_def_size); + close(fd2); + close(fd); +} + /* * Test SEAL_SHRINK * Test whether SEAL_SHRINK actually prevents shrinking @@ -945,6 +1018,7 @@ int main(int argc, char **argv) test_basic(); test_seal_write(); + test_seal_future_write(); test_seal_shrink(); test_seal_grow(); test_seal_resize(); From d7068618ae1fbb80fc16bd7c58798e208a696483 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 8 Jan 2019 11:44:41 +0000 Subject: [PATCH 08/58] Btrfs: fix deadlock when using free space tree due to block group creation commit a6d8654d885d7d79a3fb82da64eaa489ca332a82 upstream. When modifying the free space tree we can end up COWing one of its extent buffers which in turn might result in allocating a new chunk, which in turn can result in flushing (finish creation) of pending block groups. If that happens we can deadlock because creating a pending block group needs to update the free space tree, and if any of the updates tries to modify the same extent buffer that we are COWing, we end up in a deadlock since we try to write lock twice the same extent buffer. So fix this by skipping pending block group creation if we are COWing an extent buffer from the free space tree. This is a case missed by commit 5ce555578e091 ("Btrfs: fix deadlock when writing out free space caches"). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202173 Fixes: 5ce555578e091 ("Btrfs: fix deadlock when writing out free space caches") CC: stable@vger.kernel.org # 4.18+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fa18520529f3..7ad6f2eec711 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1051,19 +1051,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = parent->start; /* - * If we are COWing a node/leaf from the extent, chunk or device trees, - * make sure that we do not finish block group creation of pending block - * groups. We do this to avoid a deadlock. + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. * COWing can result in allocation of a new chunk, and flushing pending * block groups (btrfs_create_pending_block_groups()) can be triggered * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk and device trees, therefore we could - * deadlock with ourselves since we are holding a lock on an extent - * buffer that btrfs_create_pending_block_groups() may try to COW later. + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. */ if (root == fs_info->extent_root || root == fs_info->chunk_root || - root == fs_info->dev_root) + root == fs_info->dev_root || + root == fs_info->free_space_root) trans->can_flush_pending_bgs = false; cow = btrfs_alloc_tree_block(trans, root, parent_start, From ce588054801d03ccb956e088e679baed1cf34105 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 2 Jan 2019 20:12:46 -0600 Subject: [PATCH 09/58] staging: rtl8188eu: Fix module loading from tasklet for CCMP encryption commit 84cad97a717f5749a0236abd5ce68da582ea074f upstream. Commit 6bd082af7e36 ("staging:r8188eu: use lib80211 CCMP decrypt") causes scheduling while atomic bugs followed by a hard freeze whenever the driver tries to connect to a CCMP-encrypted network. Experimentation showed that the freezes were eliminated when module lib80211 was preloaded, which can be forced by calling lib80211_get_crypto_ops() directly rather than indirectly through try_then_request_module(). With this change, no BUG messages are logged. Fixes: 6bd082af7e36 ("staging:r8188eu: use lib80211 CCMP decrypt") Cc: Stable # v4.17+ Reported-and-tested-by: Michael Straube Cc: Ivan Safonov Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_security.c b/drivers/staging/rtl8188eu/core/rtw_security.c index 2a48b09ea9ae..c44d657cac89 100644 --- a/drivers/staging/rtl8188eu/core/rtw_security.c +++ b/drivers/staging/rtl8188eu/core/rtw_security.c @@ -1292,7 +1292,7 @@ u32 rtw_aes_decrypt(struct adapter *padapter, u8 *precvframe) struct sk_buff *skb = ((struct recv_frame *)precvframe)->pkt; void *crypto_private = NULL; u8 *key, *pframe = skb->data; - struct lib80211_crypto_ops *crypto_ops = try_then_request_module(lib80211_get_crypto_ops("CCMP"), "lib80211_crypt_ccmp"); + struct lib80211_crypto_ops *crypto_ops = lib80211_get_crypto_ops("CCMP"); struct security_priv *psecuritypriv = &padapter->securitypriv; char iv[8], icv[8]; From f64231cb4f133c3638f210b81bae9fbffc0841fa Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 2 Jan 2019 20:12:47 -0600 Subject: [PATCH 10/58] staging: rtl8188eu: Fix module loading from tasklet for WEP encryption commit 7775665aadc48a562051834a73519129bf717d73 upstream. Commit 2b2ea09e74a5 ("staging:r8188eu: Use lib80211 to decrypt WEP-frames") causes scheduling while atomic bugs followed by a hard freeze whenever the driver tries to connect to a WEP-encrypted network. Experimentation showed that the freezes were eliminated when module lib80211 was preloaded, which can be forced by calling lib80211_get_crypto_ops() directly rather than indirectly through try_then_request_module(). With this change, no BUG messages are logged. Fixes: 2b2ea09e74a5 ("staging:r8188eu: Use lib80211 to decrypt WEP-frames") Cc: Stable # v4.17+ Cc: Michael Straube Cc: Ivan Safonov Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_security.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_security.c b/drivers/staging/rtl8188eu/core/rtw_security.c index c44d657cac89..470ea2c0c433 100644 --- a/drivers/staging/rtl8188eu/core/rtw_security.c +++ b/drivers/staging/rtl8188eu/core/rtw_security.c @@ -154,7 +154,7 @@ void rtw_wep_encrypt(struct adapter *padapter, u8 *pxmitframe) pframe = ((struct xmit_frame *)pxmitframe)->buf_addr + hw_hdr_offset; - crypto_ops = try_then_request_module(lib80211_get_crypto_ops("WEP"), "lib80211_crypt_wep"); + crypto_ops = lib80211_get_crypto_ops("WEP"); if (!crypto_ops) return; @@ -210,7 +210,7 @@ int rtw_wep_decrypt(struct adapter *padapter, u8 *precvframe) void *crypto_private = NULL; int status = _SUCCESS; const int keyindex = prxattrib->key_index; - struct lib80211_crypto_ops *crypto_ops = try_then_request_module(lib80211_get_crypto_ops("WEP"), "lib80211_crypt_wep"); + struct lib80211_crypto_ops *crypto_ops = lib80211_get_crypto_ops("WEP"); char iv[4], icv[4]; if (!crypto_ops) { From 4ecbd88bf9949cc705e49fde3db450ad2f81fae3 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 9 Jan 2019 10:42:36 +0000 Subject: [PATCH 11/58] cpufreq: scmi: Fix frequency invariance in slow path commit 0e141d1c65c1dd31c914eb2e11651adcc1a15912 upstream. The scmi-cpufreq driver calls the arch_set_freq_scale() callback on frequency changes to provide scale-invariant load-tracking signals to the scheduler. However, in the slow path, it does so while specifying the current and max frequencies in different units, hence resulting in a broken freq_scale factor. Fix this by passing all frequencies in KHz, as stored in the CPUFreq frequency table. Fixes: 99d6bdf33877 (cpufreq: add support for CPU DVFS based on SCMI message protocol) Signed-off-by: Quentin Perret Acked-by: Viresh Kumar Acked-by: Sudeep Holla Cc: 4.17+ # v4.17+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/scmi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 50b1551ba894..3f0693439486 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -52,9 +52,9 @@ scmi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) int ret; struct scmi_data *priv = policy->driver_data; struct scmi_perf_ops *perf_ops = handle->perf_ops; - u64 freq = policy->freq_table[index].frequency * 1000; + u64 freq = policy->freq_table[index].frequency; - ret = perf_ops->freq_set(handle, priv->domain_id, freq, false); + ret = perf_ops->freq_set(handle, priv->domain_id, freq * 1000, false); if (!ret) arch_set_freq_scale(policy->related_cpus, freq, policy->cpuinfo.max_freq); From 4bef2bacb1c51ca70ebcbfc10359171785b7eddb Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Tue, 11 Dec 2018 00:37:25 +0800 Subject: [PATCH 12/58] x86, modpost: Replace last remnants of RETPOLINE with CONFIG_RETPOLINE commit e4f358916d528d479c3c12bd2fd03f2d5a576380 upstream. Commit 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") replaced the RETPOLINE define with CONFIG_RETPOLINE checks. Remove the remaining pieces. [ bp: Massage commit message. ] Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") Signed-off-by: WANG Chao Signed-off-by: Borislav Petkov Reviewed-by: Zhenzhong Duan Reviewed-by: Masahiro Yamada Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Woodhouse Cc: Geert Uytterhoeven Cc: Jessica Yu Cc: Jiri Kosina Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Luc Van Oostenryck Cc: Michal Marek Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Tim Chen Cc: Vasily Gorbik Cc: linux-kbuild@vger.kernel.org Cc: srinivas.eeda@oracle.com Cc: stable Cc: x86-ml Link: https://lkml.kernel.org/r/20181210163725.95977-1-chao.wang@ucloud.cn Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/module.h | 2 +- scripts/mod/modpost.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index abb92c341693..807d06a7acac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -213,7 +213,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = SPECTRE_V2_USER_NONE; -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE static bool spectre_v2_bad_module; bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 4d36b27214fd..0242f6eec4ea 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -75,7 +75,7 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE #define __noretpoline __attribute__((indirect_branch("keep"))) #endif diff --git a/include/linux/module.h b/include/linux/module.h index e19ae08c7fb8..904f94628132 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -818,7 +818,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef RETPOLINE +#ifdef CONFIG_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 0d998c54564d..5a5b3780456f 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2157,7 +2157,7 @@ static void add_intree_flag(struct buffer *b, int is_intree) /* Cannot check for assembler */ static void add_retpoline(struct buffer *b) { - buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "\n#ifdef CONFIG_RETPOLINE\n"); buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); buf_printf(b, "#endif\n"); } From 50a67b1a85b9b2b6b1475a0277e9109fe20b9625 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 3 Jan 2019 15:53:39 +0800 Subject: [PATCH 13/58] ALSA: hda/realtek - Support Dell headset mode for New AIO platform commit c2a7c55a04065c3b0c32d23b099db7ea1dbf6250 upstream. Dell has new platform for ALC274. This will support to enable headset mode. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 854d63c01dd2..5aa842c86bcc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6503,6 +6503,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 0fd36f024a707003a30b7c90cfebfe4aa1141773 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 9 Jan 2019 16:23:37 +0800 Subject: [PATCH 14/58] ALSA: hda/realtek - Add unplug function into unplug state of Headset Mode for ALC225 commit 4d4b0c52bde470c379f5d168d5c139ad866cb808 upstream. Forgot to add unplug function to unplug state of headset mode for ALC225. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5aa842c86bcc..04072caf5846 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4102,6 +4102,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0295: case 0x10ec0289: case 0x10ec0299: + alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); break; case 0x10ec0867: From 045e3d2f1216a930e64134369f9b7d3c9f10b4a9 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 9 Jan 2019 17:05:24 +0800 Subject: [PATCH 15/58] ALSA: hda/realtek - Disable headset Mic VREF for headset mode of ALC225 commit d1dd42110d2727e81b9265841a62bc84c454c3a2 upstream. Disable Headset Mic VREF for headset mode of ALC225. This will be controlled by coef bits of headset mode functions. [ Fixed a compile warning and code simplification -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 04072caf5846..8b9f2487969b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5381,6 +5381,13 @@ static void alc285_fixup_invalidate_dacs(struct hda_codec *codec, snd_hda_override_wcaps(codec, 0x03, 0); } +static void alc_fixup_disable_mic_vref(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -5493,6 +5500,7 @@ enum { ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC295_FIXUP_DISABLE_DAC3, ALC280_FIXUP_HP_HEADSET_MIC, @@ -6192,6 +6200,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC225_FIXUP_DISABLE_MIC_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_mic_vref, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -6201,7 +6215,7 @@ static const struct hda_fixup alc269_fixups[] = { {} }, .chained = true, - .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + .chain_id = ALC225_FIXUP_DISABLE_MIC_VREF }, [ALC280_FIXUP_HP_HEADSET_MIC] = { .type = HDA_FIXUP_FUNC, From d1130682d1270495f49282535854221bbdf20638 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 19 Dec 2018 22:49:09 +0000 Subject: [PATCH 16/58] CIFS: Fix adjustment of credits for MTU requests commit b983f7e92348d7e7d091db1b78b7915e9dd3d63a upstream. Currently for MTU requests we allocate maximum possible credits in advance and then adjust them according to the request size. While we were adjusting the number of credits belonging to the server, we were skipping adjustment of credits belonging to the request. This patch fixes it by setting request credits to CreditCharge field value of SMB2 packet header. Also ask 1 credit more for async read and write operations to increase parallelism and match the behavior of other operations. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index f54d07bda067..dba986524917 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3185,12 +3185,14 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += rdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + rdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } @@ -3462,12 +3464,14 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += wdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + wdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } From c3606c64678369dc108f397109569cfa0ce0c101 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 3 Jan 2019 16:45:13 -0800 Subject: [PATCH 17/58] CIFS: Do not set credits to 1 if the server didn't grant anything commit 33fa5c8b8a7dbe6353a56eaa654b790348890d42 upstream. Currently we reset the number of total credits granted by the server to 1 if the server didn't grant us anything int the response. This violates the SMB3 protocol - we need to trust the server and use the credit values from the response. Fix this by removing the corresponding code. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/transport.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 333729cf46cd..a610381f8140 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -885,8 +885,6 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, for (i = 0; i < num_rqst; i++) if (midQ[i]->resp_buf) credits += ses->server->ops->get_credits(midQ[i]); - if (!credits) - credits = 1; for (i = 0; i < num_rqst; i++) { if (rc < 0) From d2f76f6f9fa963d337e72616f6e06b8bbac5eb7f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 10 Jan 2019 11:27:28 -0800 Subject: [PATCH 18/58] CIFS: Do not hide EINTR after sending network packets commit ee13919c2e8d1f904e035ad4b4239029a8994131 upstream. Currently we hide EINTR code returned from sock_sendmsg() and return 0 instead. This makes a caller think that we successfully completed the network operation which is not true. Fix this by properly returning EINTR to callers. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a610381f8140..005c2af9d696 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -378,7 +378,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (rc < 0 && rc != -EINTR) cifs_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else + else if (rc > 0) rc = 0; return rc; From 7dcc5b36ea7f5f5d15907e439befa14cecffc9f1 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 22 Dec 2018 12:40:05 -0800 Subject: [PATCH 19/58] CIFS: Fix credit computation for compounded requests commit 8544f4aa9dd19a04d1244dae10feecc813ccf175 upstream. In SMB3 protocol every part of the compound chain consumes credits individually, so we need to call wait_for_free_credits() for each of the PDUs in the chain. If an operation is interrupted, we must ensure we return all credits taken from the server structure back. Without this patch server can sometimes disconnect the session due to credit mismatches, especially when first operation(s) are large writes. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/transport.c | 59 +++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 005c2af9d696..66348b3d28e6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -786,7 +786,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, int i, j, rc = 0; int timeout, optype; struct mid_q_entry *midQ[MAX_COMPOUND]; - unsigned int credits = 0; + bool cancelled_mid[MAX_COMPOUND] = {false}; + unsigned int credits[MAX_COMPOUND] = {0}; char *buf; timeout = flags & CIFS_TIMEOUT_MASK; @@ -804,13 +805,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, return -ENOENT; /* - * Ensure that we do not send more than 50 overlapping requests - * to the same server. We may make this configurable later or - * use ses->maxReq. + * Ensure we obtain 1 credit per request in the compound chain. + * It can be optimized further by waiting for all the credits + * at once but this can wait long enough if we don't have enough + * credits due to some heavy operations in progress or the server + * not granting us much, so a fallback to the current approach is + * needed anyway. */ - rc = wait_for_free_request(ses->server, timeout, optype); - if (rc) - return rc; + for (i = 0; i < num_rqst; i++) { + rc = wait_for_free_request(ses->server, timeout, optype); + if (rc) { + /* + * We haven't sent an SMB packet to the server yet but + * we already obtained credits for i requests in the + * compound chain - need to return those credits back + * for future use. Note that we need to call add_credits + * multiple times to match the way we obtained credits + * in the first place and to account for in flight + * requests correctly. + */ + for (j = 0; j < i; j++) + add_credits(ses->server, 1, optype); + return rc; + } + credits[i] = 1; + } /* * Make sure that we sign in the same order that we send on this socket @@ -826,8 +845,10 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, for (j = 0; j < i; j++) cifs_delete_mid(midQ[j]); mutex_unlock(&ses->server->srv_mutex); + /* Update # of requests on wire to server */ - add_credits(ses->server, 1, optype); + for (j = 0; j < num_rqst; j++) + add_credits(ses->server, credits[j], optype); return PTR_ERR(midQ[i]); } @@ -874,17 +895,16 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { midQ[i]->mid_flags |= MID_WAIT_CANCELLED; midQ[i]->callback = DeleteMidQEntry; - spin_unlock(&GlobalMid_Lock); - add_credits(ses->server, 1, optype); - return rc; + cancelled_mid[i] = true; } spin_unlock(&GlobalMid_Lock); } } for (i = 0; i < num_rqst; i++) - if (midQ[i]->resp_buf) - credits += ses->server->ops->get_credits(midQ[i]); + if (!cancelled_mid[i] && midQ[i]->resp_buf + && (midQ[i]->mid_state == MID_RESPONSE_RECEIVED)) + credits[i] = ses->server->ops->get_credits(midQ[i]); for (i = 0; i < num_rqst; i++) { if (rc < 0) @@ -892,8 +912,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sync_mid_result(midQ[i], ses->server); if (rc != 0) { - add_credits(ses->server, credits, optype); - return rc; + /* mark this mid as cancelled to not free it below */ + cancelled_mid[i] = true; + goto out; } if (!midQ[i]->resp_buf || @@ -940,9 +961,11 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ - for (i = 0; i < num_rqst; i++) - cifs_delete_mid(midQ[i]); - add_credits(ses->server, credits, optype); + for (i = 0; i < num_rqst; i++) { + if (!cancelled_mid[i]) + cifs_delete_mid(midQ[i]); + add_credits(ses->server, credits[i], optype); + } return rc; } From 2a71a47e03ffa7aa68893406f8134977cb164b59 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 8 Jan 2019 18:30:57 +0000 Subject: [PATCH 20/58] cifs: Fix potential OOB access of lock element array commit b9a74cde94957d82003fb9f7ab4777938ca851cd upstream. If maxBuf is small but non-zero, it could result in a zero sized lock element array which we would then try and access OOB. Signed-off-by: Ross Lagerwall Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 8 ++++---- fs/cifs/smb2file.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8d41ca7bfcf1..7b637fc27990 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1120,10 +1120,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { free_xid(xid); return -EINVAL; } @@ -1460,10 +1460,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) return -EINVAL; max_num = (max_buf - sizeof(struct smb_hdr)) / diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 4ed10dd086e6..2fc3d31967ee 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -122,10 +122,10 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < sizeof(struct smb2_lock_element)) return -EINVAL; max_num = max_buf / sizeof(struct smb2_lock_element); From 51b2a8e263cb219538b2ee48dc37be2af7c65559 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 28 Dec 2018 16:15:41 +0100 Subject: [PATCH 21/58] usb: cdc-acm: send ZLP for Telit 3G Intel based modems commit 34aabf918717dd14e05051896aaecd3b16b53d95 upstream. Telit 3G Intel based modems require zero packet to be sent if out data size is equal to the endpoint max packet size. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 2886b7b477c7..08b8aa5299b5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1880,6 +1880,13 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */ + .driver_info = SEND_ZERO_PACKET, + }, + { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */ + .driver_info = SEND_ZERO_PACKET, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, From 621ecb93578a8eedef7f1bb94dfa6729e99ed14a Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 3 Jan 2019 11:26:17 +0800 Subject: [PATCH 22/58] USB: storage: don't insert sane sense for SPC3+ when bad sense specified commit c5603d2fdb424849360fe7e3f8c1befc97571b8c upstream. Currently the code will set US_FL_SANE_SENSE flag unconditionally if device claims SPC3+, however we should allow US_FL_BAD_SENSE flag to prevent this behavior, because SMI SM3350 UFS-USB bridge controller, which claims SPC4, will show strange behavior with 96-byte sense (put the chip into a wrong state that cannot read/write anything). Check the presence of US_FL_BAD_SENSE when assuming US_FL_SANE_SENSE on SPC4+ devices. Signed-off-by: Icenowy Zheng Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index e227bb5b794f..101ebac43c87 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -235,8 +235,12 @@ static int slave_configure(struct scsi_device *sdev) if (!(us->fflags & US_FL_NEEDS_CAP16)) sdev->try_rc_10_first = 1; - /* assume SPC3 or latter devices support sense size > 18 */ - if (sdev->scsi_level > SCSI_SPC_2) + /* + * assume SPC3 or latter devices support sense size > 18 + * unless US_FL_BAD_SENSE quirk is specified. + */ + if (sdev->scsi_level > SCSI_SPC_2 && + !(us->fflags & US_FL_BAD_SENSE)) us->fflags |= US_FL_SANE_SENSE; /* From 12388881ce0d5ae7113b2730e424df9d077fe9d8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 3 Jan 2019 11:26:18 +0800 Subject: [PATCH 23/58] USB: storage: add quirk for SMI SM3350 commit 0a99cc4b8ee83885ab9f097a3737d1ab28455ac0 upstream. The SMI SM3350 USB-UFS bridge controller cannot handle long sense request correctly and will make the chip refuse to do read/write when requested long sense. Add a bad sense quirk for it. Signed-off-by: Icenowy Zheng Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index f7f83b21dc74..ea0d27a94afe 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1265,6 +1265,18 @@ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY ), +/* + * Reported by Icenowy Zheng + * The SMI SM3350 USB-UFS bridge controller will enter a wrong state + * that do not process read/write command if a long sense is requested, + * so force to use 18-byte sense. + */ +UNUSUAL_DEV( 0x090c, 0x3350, 0x0000, 0xffff, + "SMI", + "SM3350 UFS-to-USB-Mass-Storage bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BAD_SENSE ), + /* * Reported by Paul Hartman * This card reader returns "Illegal Request, Logical Block Address From 33c96bc68ff377064b18cd4bc821095b96a41e05 Mon Sep 17 00:00:00 2001 From: Jack Stocker Date: Thu, 3 Jan 2019 21:56:53 +0000 Subject: [PATCH 24/58] USB: Add USB_QUIRK_DELAY_CTRL_MSG quirk for Corsair K70 RGB commit 3483254b89438e60f719937376c5e0ce2bc46761 upstream. To match the Corsair Strafe RGB, the Corsair K70 RGB also requires USB_QUIRK_DELAY_CTRL_MSG to completely resolve boot connection issues discussed here: https://github.com/ckb-next/ckb-next/issues/42. Otherwise roughly 1 in 10 boots the keyboard will fail to be detected. Patch that applied delay control quirk for Corsair Strafe RGB: cb88a0588717 ("usb: quirks: add control message delay for 1b1c:1b20") Previous K70 RGB patch to add delay-init quirk: 7a1646d92257 ("Add delay-init quirk for Corsair K70 RGB keyboards") Signed-off-by: Jack Stocker Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 514c5214ddb2..8bc35d53408b 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -394,7 +394,8 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a40, 0x0101), .driver_info = USB_QUIRK_HUB_SLOW_RESET }, /* Corsair K70 RGB */ - { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, /* Corsair Strafe */ { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | From f928ca3917478d5f01e49c96f172dbe4c843d4c1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 8 Jan 2019 15:23:00 -0800 Subject: [PATCH 25/58] slab: alien caches must not be initialized if the allocation of the alien cache failed commit 09c2e76ed734a1d36470d257a778aaba28e86531 upstream. Callers of __alloc_alien() check for NULL. We must do the same check in __alloc_alien_cache to avoid NULL pointer dereferences on allocation failures. Link: http://lkml.kernel.org/r/010001680f42f192-82b4e12e-1565-4ee0-ae1f-1e98974906aa-000000@email.amazonses.com Fixes: 49dfc304ba241 ("slab: use the lock on alien_cache, instead of the lock on array_cache") Fixes: c8522a3a5832b ("Slab: introduce alloc_alien") Signed-off-by: Christoph Lameter Reported-by: syzbot+d6ed4ec679652b4fd4e4@syzkaller.appspotmail.com Reviewed-by: Andrew Morton Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index d73c7a4820a4..fad6839e8eab 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -679,8 +679,10 @@ static struct alien_cache *__alloc_alien_cache(int node, int entries, struct alien_cache *alc = NULL; alc = kmalloc_node(memsize, gfp, node); - init_arraycache(&alc->ac, entries, batch); - spin_lock_init(&alc->lock); + if (alc) { + init_arraycache(&alc->ac, entries, batch); + spin_lock_init(&alc->lock); + } return alc; } From 8a4b6e8cb796cc6e90f4ae4f4985ae64c17c41d4 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 8 Jan 2019 15:23:04 -0800 Subject: [PATCH 26/58] mm/usercopy.c: no check page span for stack objects commit 7bff3c06997374fb9b9991536a547b840549a813 upstream. It is easy to trigger this with CONFIG_HARDENED_USERCOPY_PAGESPAN=y, usercopy: Kernel memory overwrite attempt detected to spans multiple pages (offset 0, size 23)! kernel BUG at mm/usercopy.c:102! For example, print_worker_info char name[WQ_NAME_LEN] = { }; char desc[WORKER_DESC_LEN] = { }; probe_kernel_read(name, wq->name, sizeof(name) - 1); probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); __copy_from_user_inatomic check_object_size check_heap_object check_page_span This is because on-stack variables could cross PAGE_SIZE boundary, and failed this check, if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) ptr = FFFF889007D7EFF8 end = FFFF889007D7F00E Hence, fix it by checking if it is a stack object first. [keescook@chromium.org: improve comments after reorder] Link: http://lkml.kernel.org/r/20190103165151.GA32845@beast Link: http://lkml.kernel.org/r/20181231030254.99441-1-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Kees Cook Acked-by: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/usercopy.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 852eb4e53f06..14faadcedd06 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -247,7 +247,8 @@ static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks); /* * Validates that the given object is: * - not bogus address - * - known-safe heap or stack object + * - fully contained by stack (or stack frame, when available) + * - fully within SLAB object (or object whitelist area, when available) * - not in kernel text */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) @@ -262,9 +263,6 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) /* Check for invalid addresses. */ check_bogus_address((const unsigned long)ptr, n, to_user); - /* Check for bad heap object. */ - check_heap_object(ptr, n, to_user); - /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { case NOT_STACK: @@ -282,6 +280,9 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) usercopy_abort("process stack", NULL, to_user, 0, n); } + /* Check for bad heap object. */ + check_heap_object(ptr, n, to_user); + /* Check for object in kernel to avoid text exposure. */ check_kernel_text_object((const unsigned long)ptr, n, to_user); } From 97b02b6324666dcf00467efec75b8928151f8654 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 8 Jan 2019 15:23:07 -0800 Subject: [PATCH 27/58] mm, memcg: fix reclaim deadlock with writeback commit 63f3655f950186752236bb88a22f8252c11ce394 upstream. Liu Bo has experienced a deadlock between memcg (legacy) reclaim and the ext4 writeback task1: wait_on_page_bit+0x82/0xa0 shrink_page_list+0x907/0x960 shrink_inactive_list+0x2c7/0x680 shrink_node_memcg+0x404/0x830 shrink_node+0xd8/0x300 do_try_to_free_pages+0x10d/0x330 try_to_free_mem_cgroup_pages+0xd5/0x1b0 try_charge+0x14d/0x720 memcg_kmem_charge_memcg+0x3c/0xa0 memcg_kmem_charge+0x7e/0xd0 __alloc_pages_nodemask+0x178/0x260 alloc_pages_current+0x95/0x140 pte_alloc_one+0x17/0x40 __pte_alloc+0x1e/0x110 alloc_set_pte+0x5fe/0xc20 do_fault+0x103/0x970 handle_mm_fault+0x61e/0xd10 __do_page_fault+0x252/0x4d0 do_page_fault+0x30/0x80 page_fault+0x28/0x30 task2: __lock_page+0x86/0xa0 mpage_prepare_extent_to_map+0x2e7/0x310 [ext4] ext4_writepages+0x479/0xd60 do_writepages+0x1e/0x30 __writeback_single_inode+0x45/0x320 writeback_sb_inodes+0x272/0x600 __writeback_inodes_wb+0x92/0xc0 wb_writeback+0x268/0x300 wb_workfn+0xb4/0x390 process_one_work+0x189/0x420 worker_thread+0x4e/0x4b0 kthread+0xe6/0x100 ret_from_fork+0x41/0x50 He adds "task1 is waiting for the PageWriteback bit of the page that task2 has collected in mpd->io_submit->io_bio, and tasks2 is waiting for the LOCKED bit the page which tasks1 has locked" More precisely task1 is handling a page fault and it has a page locked while it charges a new page table to a memcg. That in turn hits a memory limit reclaim and the memcg reclaim for legacy controller is waiting on the writeback but that is never going to finish because the writeback itself is waiting for the page locked in the #PF path. So this is essentially ABBA deadlock: lock_page(A) SetPageWriteback(A) unlock_page(A) lock_page(B) lock_page(B) pte_alloc_pne shrink_page_list wait_on_page_writeback(A) SetPageWriteback(B) unlock_page(B) # flush A, B to clear the writeback This accumulating of more pages to flush is used by several filesystems to generate a more optimal IO patterns. Waiting for the writeback in legacy memcg controller is a workaround for pre-mature OOM killer invocations because there is no dirty IO throttling available for the controller. There is no easy way around that unfortunately. Therefore fix this specific issue by pre-allocating the page table outside of the page lock. We have that handy infrastructure for that already so simply reuse the fault-around pattern which already does this. There are probably other hidden __GFP_ACCOUNT | GFP_KERNEL allocations from under a fs page locked but they should be really rare. I am not aware of a better solution unfortunately. [akpm@linux-foundation.org: fix mm/memory.c:__do_fault()] [akpm@linux-foundation.org: coding-style fixes] [mhocko@kernel.org: enhance comment, per Johannes] Link: http://lkml.kernel.org/r/20181214084948.GA5624@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20181213092221.27270-1-mhocko@kernel.org Fixes: c3b94f44fcb0 ("memcg: further prevent OOM with too many dirty pages") Signed-off-by: Michal Hocko Reported-by: Liu Bo Debugged-by: Liu Bo Acked-by: Kirill A. Shutemov Acked-by: Johannes Weiner Reviewed-by: Liu Bo Cc: Jan Kara Cc: Dave Chinner Cc: Theodore Ts'o Cc: Vladimir Davydov Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 5c5df53dbdf9..281172540a9c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3237,6 +3237,29 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; + /* + * Preallocate pte before we take page_lock because this might lead to + * deadlocks for memcg reclaim which waits for pages under writeback: + * lock_page(A) + * SetPageWriteback(A) + * unlock_page(A) + * lock_page(B) + * lock_page(B) + * pte_alloc_pne + * shrink_page_list + * wait_on_page_writeback(A) + * SetPageWriteback(B) + * unlock_page(B) + * # flush A, B to clear the writeback + */ + if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { + vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, + vmf->address); + if (!vmf->prealloc_pte) + return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ + } + ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) From fec7361193b4b14e5ff6b7a824cae703658f5534 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 30 Dec 2018 18:25:00 +0100 Subject: [PATCH 28/58] ACPI: power: Skip duplicate power resource references in _PRx commit 7d7b467cb95bf29597b417d4990160d4ea6d69b9 upstream. Some ACPI tables contain duplicate power resource references like this: Name (_PR0, Package (0x04) // _PR0: Power Resources for D0 { P28P, P18P, P18P, CLK4 }) This causes a WARN_ON in sysfs_add_link_to_group() because we end up adding a link to the same acpi_device twice: sysfs: cannot create duplicate filename '/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/808622C1:00/OVTI2680:00/power_resources_D0/LNXPOWER:0a' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.12-301.fc29.x86_64 #1 Hardware name: Insyde CherryTrail/Type2 - Board Product Name, BIOS jumperx.T87.KFBNEEA02 04/13/2016 Call Trace: dump_stack+0x5c/0x80 sysfs_warn_dup.cold.3+0x17/0x2a sysfs_do_create_link_sd.isra.2+0xa9/0xb0 sysfs_add_link_to_group+0x30/0x50 acpi_power_expose_list+0x74/0xa0 acpi_power_add_remove_device+0x50/0xa0 acpi_add_single_object+0x26b/0x5f0 acpi_bus_check_add+0xc4/0x250 ... To address this issue, make acpi_extract_power_resources() check for duplicates and simply skip them when found. Cc: All applicable Signed-off-by: Hans de Goede [ rjw: Subject & changelog, comments ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/power.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 1b475bc1ae16..665e93ca0b40 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -131,6 +131,23 @@ void acpi_power_resources_list_free(struct list_head *list) } } +static bool acpi_power_resource_is_dup(union acpi_object *package, + unsigned int start, unsigned int i) +{ + acpi_handle rhandle, dup; + unsigned int j; + + /* The caller is expected to check the package element types */ + rhandle = package->package.elements[i].reference.handle; + for (j = start; j < i; j++) { + dup = package->package.elements[j].reference.handle; + if (dup == rhandle) + return true; + } + + return false; +} + int acpi_extract_power_resources(union acpi_object *package, unsigned int start, struct list_head *list) { @@ -150,6 +167,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start, err = -ENODEV; break; } + + /* Some ACPI tables contain duplicate power resource references */ + if (acpi_power_resource_is_dup(package, start, i)) + continue; + err = acpi_add_power_resource(rhandle); if (err) break; From 6fd3d197510564c84c31e060fef2b8ce74c2b95d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 4 Jan 2019 23:10:54 +0100 Subject: [PATCH 29/58] ACPI / PMIC: xpower: Fix TS-pin current-source handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2b531d71595d2b5b12782a49b23c335869e2621e upstream. The current-source used for the battery temp-sensor (TS) is shared with the GPADC. For proper fuel-gauge and charger operation the TS current-source needs to be permanently on. But to read the GPADC we need to temporary switch the TS current-source to ondemand, so that the GPADC can use it, otherwise we will always read an all 0 value. The switching from on to on-ondemand is not necessary when the TS current-source is off (this happens on devices which do not have a TS). Prior to this commit there were 2 issues with our handling of the TS current-source switching: 1) We were writing hardcoded values to the ADC TS pin-ctrl register, overwriting various other unrelated bits. Specifically we were overwriting the current-source setting for the TS and GPIO0 pins, forcing it to 80ųA independent of its original setting. On a Chuwi Vi10 tablet this was causing us to get a too high adc value (due to a too high current-source) resulting in acpi_lpat_raw_to_temp() returning -ENOENT, resulting in: ACPI Error: AE_ERROR, Returned by Handler for [UserDefinedRegion] ACPI Error: Method parse/execution failed \_SB.SXP1._TMP, AE_ERROR This commit fixes this by using regmap_update_bits to change only the relevant bits. 2) At the end of intel_xpower_pmic_get_raw_temp() we were unconditionally enabling the TS current-source even on devices where the TS-pin is not used and the current-source thus was off on entry of the function. This commit fixes this by checking if the TS current-source is off when entering intel_xpower_pmic_get_raw_temp() and if so it is left as is. Fixes: 58eefe2f3f53 (ACPI / PMIC: xpower: Do pinswitch ... reading GPADC) Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/pmic/intel_pmic_xpower.c | 41 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c index 316e55174aa9..bb5391f59b8b 100644 --- a/drivers/acpi/pmic/intel_pmic_xpower.c +++ b/drivers/acpi/pmic/intel_pmic_xpower.c @@ -27,8 +27,11 @@ #define GPI1_LDO_ON (3 << 0) #define GPI1_LDO_OFF (4 << 0) -#define AXP288_ADC_TS_PIN_GPADC 0xf2 -#define AXP288_ADC_TS_PIN_ON 0xf3 +#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) +#define AXP288_ADC_TS_CURRENT_OFF (0 << 0) +#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) +#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND (2 << 0) +#define AXP288_ADC_TS_CURRENT_ON (3 << 0) static struct pmic_table power_table[] = { { @@ -211,22 +214,44 @@ static int intel_xpower_pmic_update_power(struct regmap *regmap, int reg, */ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg) { + int ret, adc_ts_pin_ctrl; u8 buf[2]; - int ret; - ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, - AXP288_ADC_TS_PIN_GPADC); + /* + * The current-source used for the battery temp-sensor (TS) is shared + * with the GPADC. For proper fuel-gauge and charger operation the TS + * current-source needs to be permanently on. But to read the GPADC we + * need to temporary switch the TS current-source to ondemand, so that + * the GPADC can use it, otherwise we will always read an all 0 value. + * + * Note that the switching from on to on-ondemand is not necessary + * when the TS current-source is off (this happens on devices which + * do not use the TS-pin). + */ + ret = regmap_read(regmap, AXP288_ADC_TS_PIN_CTRL, &adc_ts_pin_ctrl); if (ret) return ret; - /* After switching to the GPADC pin give things some time to settle */ - usleep_range(6000, 10000); + if (adc_ts_pin_ctrl & AXP288_ADC_TS_CURRENT_ON_OFF_MASK) { + ret = regmap_update_bits(regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON_ONDEMAND); + if (ret) + return ret; + + /* Wait a bit after switching the current-source */ + usleep_range(6000, 10000); + } ret = regmap_bulk_read(regmap, AXP288_GP_ADC_H, buf, 2); if (ret == 0) ret = (buf[0] << 4) + ((buf[1] >> 4) & 0x0f); - regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON); + if (adc_ts_pin_ctrl & AXP288_ADC_TS_CURRENT_ON_OFF_MASK) { + regmap_update_bits(regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON); + } return ret; } From 3fdb8121e90a07c7eb5223e35b34dd5f4280f319 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 10 Jan 2019 18:41:51 +0000 Subject: [PATCH 30/58] ACPI/IORT: Fix rc_dma_get_range() commit c7777236dd8f587f6a8d6800c03df318fd4d2627 upstream. When executed for a PCI_ROOT_COMPLEX type, iort_match_node_callback() expects the opaque pointer argument to be a PCI bus device. At the moment rc_dma_get_range() passes the PCI endpoint instead of the bus, and we've been lucky to have pci_domain_nr(ptr) return 0 instead of crashing. Pass the bus device to iort_scan_node(). Fixes: 5ac65e8c8941 ("ACPI/IORT: Support address size limit for root complexes") Reported-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Signed-off-by: Lorenzo Pieralisi Reviewed-by: Eric Auger Acked-by: Robin Murphy Cc: stable@vger.kernel.org Cc: Will Deacon Cc: Hanjun Guo Cc: Sudeep Holla Cc: Catalin Marinas Cc: "Rafael J. Wysocki" Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/arm64/iort.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e938576e58cb..e48eebc27b81 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -951,9 +951,10 @@ static int rc_dma_get_range(struct device *dev, u64 *size) { struct acpi_iort_node *node; struct acpi_iort_root_complex *rc; + struct pci_bus *pbus = to_pci_dev(dev)->bus; node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, - iort_match_node_callback, dev); + iort_match_node_callback, &pbus->dev); if (!node || node->revision < 1) return -ENODEV; From fca4eb31e09688989f1a6d023131931a14387183 Mon Sep 17 00:00:00 2001 From: Yi Zeng Date: Wed, 9 Jan 2019 15:33:07 +0800 Subject: [PATCH 31/58] i2c: dev: prevent adapter retries and timeout being set as minus value commit 6ebec961d59bccf65d08b13fc1ad4e6272a89338 upstream. If adapter->retries is set to a minus value from user space via ioctl, it will make __i2c_transfer and __i2c_smbus_xfer skip the calling to adapter->algo->master_xfer and adapter->algo->smbus_xfer that is registered by the underlying bus drivers, and return value 0 to all the callers. The bus driver will never be accessed anymore by all users, besides, the users may still get successful return value without any error or information log print out. If adapter->timeout is set to minus value from user space via ioctl, it will make the retrying loop in __i2c_transfer and __i2c_smbus_xfer always break after the the first try, due to the time_after always returns true. Signed-off-by: Yi Zeng [wsa: minor grammar updates to commit message] Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 1aca742fde4a..ccd76c71af09 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -470,9 +470,15 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) data_arg.data); } case I2C_RETRIES: + if (arg > INT_MAX) + return -EINVAL; + client->adapter->retries = arg; break; case I2C_TIMEOUT: + if (arg > INT_MAX) + return -EINVAL; + /* For historical reasons, user-space sets the timeout * value in units of 10 ms. */ From 1e235ec00be05a98bc6a0802f2314000f419f708 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 23 Dec 2018 01:31:26 +0100 Subject: [PATCH 32/58] mtd: rawnand: qcom: fix memory corruption that causes panic commit 81d9bdf59092e4755fc4307c93c4589ef0fe2e0f upstream. This patch fixes a memory corruption that occurred in the qcom-nandc driver since it was converted to nand_scan(). On boot, an affected device will panic from a NPE at a weird place: | Unable to handle kernel NULL pointer dereference at virtual address 0 | pgd = (ptrval) | [00000000] *pgd=00000000 | Internal error: Oops: 80000005 [#1] SMP ARM | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.9 #0 | Hardware name: Generic DT based system | PC is at (null) | LR is at nand_block_isbad+0x90/0xa4 | pc : [<00000000>] lr : [] psr: 80000013 | sp : cf839d40 ip : 00000000 fp : cfae9e20 | r10: cf815810 r9 : 00000000 r8 : 00000000 | r7 : 00000000 r6 : 00000000 r5 : 00000001 r4 : cf815810 | r3 : 00000000 r2 : cfae9810 r1 : ffffffff r0 : cf815810 | Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none | Control: 10c5387d Table: 8020406a DAC: 00000051 | Process swapper/0 (pid: 1, stack limit = 0x(ptrval)) | [] (nand_block_isbad) from [] | [] (allocate_partition) from [] | [] (add_mtd_partitions) from [] | [] (parse_mtd_partitions) from [] | [] (mtd_device_parse_register) from [] | [] (qcom_nandc_probe) from [] The problem is that the nand_scan()'s qcom_nand_attach_chip callback is updating the nandc->max_cwperpage from 1 to 4. This causes the sg_init_table of clear_bam_transaction() in the driver's qcom_nandc_block_bad() to memset much more than what was initially allocated by alloc_bam_transaction(). This patch restores the old behavior by reallocating the shared bam transaction alloc_bam_transaction() after the chip was identified, but before mtd_device_parse_register() (which is an alias for mtd_device_register() - see panic) gets called. This fixes the corruption and the driver is working again. Cc: stable@vger.kernel.org Fixes: 6a3cec64f18c ("mtd: rawnand: qcom: convert driver to nand_scan()") Signed-off-by: Christian Lamparter Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/qcom_nandc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 8815f3e2b718..880e75f63a19 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2839,6 +2839,16 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, if (ret) return ret; + if (nandc->props->is_bam) { + free_bam_transaction(nandc); + nandc->bam_txn = alloc_bam_transaction(nandc); + if (!nandc->bam_txn) { + dev_err(nandc->dev, + "failed to allocate bam transaction\n"); + return -ENOMEM; + } + } + ret = mtd_device_register(mtd, NULL, 0); if (ret) nand_cleanup(chip); @@ -2853,16 +2863,6 @@ static int qcom_probe_nand_devices(struct qcom_nand_controller *nandc) struct qcom_nand_host *host; int ret; - if (nandc->props->is_bam) { - free_bam_transaction(nandc); - nandc->bam_txn = alloc_bam_transaction(nandc); - if (!nandc->bam_txn) { - dev_err(nandc->dev, - "failed to allocate bam transaction\n"); - return -ENOMEM; - } - } - for_each_available_child_of_node(dn, child) { host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL); if (!host) { From 719aee2c5ba17c0c87977dad59904b5af06821f0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 7 Jan 2019 22:13:22 -0700 Subject: [PATCH 33/58] vfio/type1: Fix unmap overflow off-by-one commit 58fec830fc19208354895d9832785505046d6c01 upstream. The below referenced commit adds a test for integer overflow, but in doing so prevents the unmap ioctl from ever including the last page of the address space. Subtract one to compare to the last address of the unmap to avoid the overflow and wrap-around. Fixes: 71a7d3d78e3c ("vfio/type1: silence integer overflow warning") Link: https://bugzilla.redhat.com/show_bug.cgi?id=1662291 Cc: stable@vger.kernel.org # v4.15+ Reported-by: Pei Zhang Debugged-by: Peter Xu Reviewed-by: Dan Carpenter Reviewed-by: Peter Xu Tested-by: Peter Xu Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d9fd3188615d..64cbc2d007c9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -878,7 +878,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, return -EINVAL; if (!unmap->size || unmap->size & mask) return -EINVAL; - if (unmap->iova + unmap->size < unmap->iova || + if (unmap->iova + unmap->size - 1 < unmap->iova || unmap->size > SIZE_MAX) return -EINVAL; From c051be2b26212780e0a9acddb4ebfe9bc34ba74f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Dec 2018 10:08:46 -0500 Subject: [PATCH 34/58] drm/amdgpu: Add new VegaM pci id commit f6653a0e0877572c87f6dab5351e7bd6b6b7100c upstream. Add a new pci id. Reviewed-by: Leo Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8e26e1ca14c6..b40e9c76af0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -753,6 +753,7 @@ static const struct pci_device_id pciidlist[] = { /* VEGAM */ {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, + {0x1002, 0x694F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, From 11637a3a383b858424807c218e1721b190afc153 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Nov 2018 22:57:32 +0000 Subject: [PATCH 35/58] PCI: dwc: Use interrupt masking instead of disabling commit 830920e065e90db318a0da98bf13a02b641eae7f upstream. The dwc driver is showing an interesting level of brokeness, as it insists on using the enable/disable set of registers to mask/unmask MSIs, meaning that an MSIs being generated while the interrupt is in that "disabled" state will simply be lost. Let's move to the mask/unmask set of registers, which offers the expected semantics. Fixes: 7c5925afbc58 ("PCI: dwc: Move MSI IRQs allocation to IRQ domains hierarchical API") Link: https://lore.kernel.org/linux-pci/20181113225734.8026-1-marc.zyngier@arm.com/ Tested-by: Niklas Cassel Tested-by: Gustavo Pimentel Tested-by: Stanimir Varbanov Signed-off-by: Marc Zyngier [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../pci/controller/dwc/pcie-designware-host.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 29a05759a294..0f81b7169147 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -168,8 +168,8 @@ static void dw_pci_bottom_mask(struct irq_data *data) bit = data->hwirq % MAX_MSI_IRQS_PER_CTRL; pp->irq_status[ctrl] &= ~(1 << bit); - dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, - pp->irq_status[ctrl]); + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_MASK + res, 4, + ~pp->irq_status[ctrl]); } raw_spin_unlock_irqrestore(&pp->lock, flags); @@ -191,8 +191,8 @@ static void dw_pci_bottom_unmask(struct irq_data *data) bit = data->hwirq % MAX_MSI_IRQS_PER_CTRL; pp->irq_status[ctrl] |= 1 << bit; - dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, - pp->irq_status[ctrl]); + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_MASK + res, 4, + ~pp->irq_status[ctrl]); } raw_spin_unlock_irqrestore(&pp->lock, flags); @@ -658,10 +658,15 @@ void dw_pcie_setup_rc(struct pcie_port *pp) num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL; /* Initialize IRQ Status array */ - for (ctrl = 0; ctrl < num_ctrls; ctrl++) - dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + + for (ctrl = 0; ctrl < num_ctrls; ctrl++) { + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_MASK + (ctrl * MSI_REG_CTRL_BLOCK_SIZE), - 4, &pp->irq_status[ctrl]); + 4, ~0); + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + + (ctrl * MSI_REG_CTRL_BLOCK_SIZE), + 4, ~0); + pp->irq_status[ctrl] = 0; + } /* Setup RC BARs */ dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0x00000004); From c408aac37787ab8e873ed6f04632fef2494fe0f7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Nov 2018 22:57:33 +0000 Subject: [PATCH 36/58] PCI: dwc: Take lock when ACKing an interrupt commit fce5423e4f431c71933d6c1f850b540a314aa6ee upstream. Bizarrely, there is no lock taken in the irq_ack() helper. This puts the ACK callback provided by a specific platform in a awkward situation where there is no synchronization that would be expected on other callback. Introduce the required lock, giving some level of uniformity among callbacks. Fixes: 7c5925afbc58 ("PCI: dwc: Move MSI IRQs allocation to IRQ domains hierarchical API") Link: https://lore.kernel.org/linux-pci/20181113225734.8026-1-marc.zyngier@arm.com/ Tested-by: Niklas Cassel Tested-by: Gustavo Pimentel Tested-by: Stanimir Varbanov Signed-off-by: Marc Zyngier [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-designware-host.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 0f81b7169147..610a5e018d27 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -202,11 +202,16 @@ static void dw_pci_bottom_ack(struct irq_data *d) { struct msi_desc *msi = irq_data_get_msi_desc(d); struct pcie_port *pp; + unsigned long flags; pp = msi_desc_to_pci_sysdata(msi); + raw_spin_lock_irqsave(&pp->lock, flags); + if (pp->ops->msi_irq_ack) pp->ops->msi_irq_ack(d->hwirq, pp); + + raw_spin_unlock_irqrestore(&pp->lock, flags); } static struct irq_chip dw_pci_msi_bottom_irq_chip = { From 857af87dcdce1191219e41ba1fcf5e7c19ff5e2a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Nov 2018 22:57:34 +0000 Subject: [PATCH 37/58] PCI: dwc: Move interrupt acking into the proper callback commit 3f7bb2ec20ce07c02b2002349d256c91a463fcc5 upstream. The write to the status register is really an ACK for the HW, and should be treated as such by the driver. Let's move it to the irq_ack() callback, which will prevent people from moving it around in order to paper over other bugs. Fixes: 8c934095fa2f ("PCI: dwc: Clear MSI interrupt status after it is handled, not before") Fixes: 7c5925afbc58 ("PCI: dwc: Move MSI IRQs allocation to IRQ domains hierarchical API") Link: https://lore.kernel.org/linux-pci/20181113225734.8026-1-marc.zyngier@arm.com/ Reported-by: Trent Piepho Tested-by: Niklas Cassel Tested-by: Gustavo Pimentel Tested-by: Stanimir Varbanov Signed-off-by: Marc Zyngier [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-designware-host.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 610a5e018d27..0fa9e8fdce66 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -99,9 +99,6 @@ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp) (i * MAX_MSI_IRQS_PER_CTRL) + pos); generic_handle_irq(irq); - dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + - (i * MSI_REG_CTRL_BLOCK_SIZE), - 4, 1 << pos); pos++; } } @@ -200,14 +197,18 @@ static void dw_pci_bottom_unmask(struct irq_data *data) static void dw_pci_bottom_ack(struct irq_data *d) { - struct msi_desc *msi = irq_data_get_msi_desc(d); - struct pcie_port *pp; + struct pcie_port *pp = irq_data_get_irq_chip_data(d); + unsigned int res, bit, ctrl; unsigned long flags; - pp = msi_desc_to_pci_sysdata(msi); + ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL; + res = ctrl * MSI_REG_CTRL_BLOCK_SIZE; + bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL; raw_spin_lock_irqsave(&pp->lock, flags); + dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + res, 4, 1 << bit); + if (pp->ops->msi_irq_ack) pp->ops->msi_irq_ack(d->hwirq, pp); From b7c3696f68ad0e5365e42b1b9a1200b363c1f45b Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Mon, 17 Dec 2018 10:32:22 -0500 Subject: [PATCH 38/58] drm/amd/display: Fix MST dp_blank REG_WAIT timeout commit 8c9d90eebd23b6d40ddf4ce5df5ca2b932336a06 upstream. Need to blank stream before deallocate MST payload. [drm:generic_reg_wait [amdgpu]] *ERROR* REG_WAIT timeout 10us * 3000 tries - dce110_stream_encoder_dp_blank line:944 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 2201 at /var/lib/dkms/amdgpu/18.50-690240/build/amd/amdgpu/../display/dc/dc_helper.c:249 generic_reg_wait+0xe7/0x160 [amdgpu] Call Trace: dce110_stream_encoder_dp_blank+0x11c/0x180 [amdgpu] core_link_disable_stream+0x40/0x230 [amdgpu] ? generic_reg_update_ex+0xdb/0x130 [amdgpu] dce110_reset_hw_ctx_wrap+0xb7/0x1f0 [amdgpu] dce110_apply_ctx_to_hw+0x30/0x430 [amdgpu] ? dce110_apply_ctx_for_surface+0x206/0x260 [amdgpu] dc_commit_state+0x2ba/0x4d0 [amdgpu] amdgpu_dm_atomic_commit_tail+0x297/0xd70 [amdgpu] ? amdgpu_bo_pin_restricted+0x58/0x260 [amdgpu] ? wait_for_completion_timeout+0x1f/0x120 ? wait_for_completion_interruptible+0x1c/0x160 commit_tail+0x3d/0x60 [drm_kms_helper] drm_atomic_helper_commit+0xf6/0x100 [drm_kms_helper] drm_atomic_connector_commit_dpms+0xe5/0xf0 [drm] drm_mode_obj_set_property_ioctl+0x14f/0x250 [drm] drm_mode_connector_property_set_ioctl+0x2e/0x40 [drm] drm_ioctl+0x1e0/0x430 [drm] ? drm_mode_connector_set_obj_prop+0x70/0x70 [drm] ? ep_read_events_proc+0xb0/0xb0 ? ep_scan_ready_list.constprop.18+0x1e6/0x1f0 ? timerqueue_add+0x52/0x80 amdgpu_drm_ioctl+0x49/0x80 [amdgpu] do_vfs_ioctl+0x90/0x5f0 SyS_ioctl+0x74/0x80 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ---[ end trace 3ed7b77a97d60f72 ]--- Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Hersen Wu Acked-by: Harry Wentland Acked-by: Alex Deucher Tested-by: Lyude Paul Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index fced3c1c2ef5..7c89785fd731 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2457,11 +2457,11 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; + core_dc->hwss.blank_stream(pipe_ctx); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - core_dc->hwss.blank_stream(pipe_ctx); - core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->sink->link, pipe_ctx->stream->signal); From 7398668b31108214179f74311e263b4eab275d2e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 28 Sep 2018 14:05:55 +0200 Subject: [PATCH 39/58] drm/fb_helper: Allow leaking fbdev smem_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4be9bd10e22dfc7fc101c5cf5969ef2d3a042d8a upstream. Since "drm/fb: Stop leaking physical address", the default behaviour of the DRM fbdev emulation is to set the smem_base to 0 and pass the new FBINFO_HIDE_SMEM_START flag. The main reason is to avoid leaking physical addresse to user-space, and it follows a general move over the kernel code to avoid user-space to manipulate physical addresses and then use some other mechanisms like dma-buf to transfer physical buffer handles over multiple subsystems. But, a lot of devices depends on closed sources binaries to enable OpenGL hardware acceleration that uses this smem_start value to pass physical addresses to out-of-tree modules in order to render into these physical adresses. These should use dma-buf buffers allocated from the DRM display device instead and stop relying on fbdev overallocation to gather DMA memory (some HW vendors delivers GBM and Wayland capable binaries, but older unsupported devices won't have these new binaries and are doomed until an Open Source solution like Lima finalizes). Since these devices heavily depends on this kind of software and because the smem_start population was available for years, it's a breakage to stop leaking smem_start without any alternative solutions. This patch adds a Kconfig depending on the EXPERT config and an unsafe kernel module parameter tainting the kernel when enabled. A clear comment and Kconfig help text was added to clarify why and when this patch should be reverted, but in the meantime it's a necessary feature to keep. Cc: Dave Airlie Cc: Daniel Vetter Cc: Bartlomiej Zolnierkiewicz Cc: Noralf Trønnes Cc: Maxime Ripard Cc: Eric Anholt Cc: Lucas Stach Cc: Rob Clark Cc: Ben Skeggs Cc: Christian König Signed-off-by: Neil Armstrong Reviewed-by: Maxime Ripard Tested-by: Maxime Ripard Acked-by: Daniel Vetter Acked-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1538136355-15383-1-git-send-email-narmstrong@baylibre.com Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/Kconfig | 20 ++++++++++++++++++++ drivers/gpu/drm/drm_fb_helper.c | 25 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index cb88528e7b10..e44e567bd789 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -110,6 +110,26 @@ config DRM_FBDEV_OVERALLOC is 100. Typical values for double buffering will be 200, triple buffering 300. +config DRM_FBDEV_LEAK_PHYS_SMEM + bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)" + depends on DRM_FBDEV_EMULATION && EXPERT + default n + help + In order to keep user-space compatibility, we want in certain + use-cases to keep leaking the fbdev physical address to the + user-space program handling the fbdev buffer. + This affects, not only, Amlogic, Allwinner or Rockchip devices + with ARM Mali GPUs using an userspace Blob. + This option is not supported by upstream developers and should be + removed as soon as possible and be considered as a broken and + legacy behaviour from a modern fbdev device driver. + + Please send any bug reports when using this to your proprietary + software vendor that requires this. + + If in doubt, say "N" or spread the word to your closed source + library vendor. + config DRM_LOAD_EDID_FIRMWARE bool "Allow to specify an EDID data set instead of probing for it" depends on DRM diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 9214c8b02484..11174b876b70 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -56,6 +56,25 @@ MODULE_PARM_DESC(drm_fbdev_overalloc, "Overallocation of the fbdev buffer (%) [default=" __MODULE_STRING(CONFIG_DRM_FBDEV_OVERALLOC) "]"); +/* + * In order to keep user-space compatibility, we want in certain use-cases + * to keep leaking the fbdev physical address to the user-space program + * handling the fbdev buffer. + * This is a bad habit essentially kept into closed source opengl driver + * that should really be moved into open-source upstream projects instead + * of using legacy physical addresses in user space to communicate with + * other out-of-tree kernel modules. + * + * This module_param *should* be removed as soon as possible and be + * considered as a broken and legacy behaviour from a modern fbdev device. + */ +#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) +static bool drm_leak_fbdev_smem = false; +module_param_unsafe(drm_leak_fbdev_smem, bool, 0600); +MODULE_PARM_DESC(fbdev_emulation, + "Allow unsafe leaking fbdev physical smem address [default=false]"); +#endif + static LIST_HEAD(kernel_fb_helper_list); static DEFINE_MUTEX(kernel_fb_helper_lock); @@ -3041,6 +3060,12 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_size = fb->height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; fbi->screen_buffer = buffer->vaddr; + /* Shamelessly leak the physical address to user-space */ +#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) + if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0) + fbi->fix.smem_start = + page_to_phys(virt_to_page(fbi->screen_buffer)); +#endif strcpy(fbi->fix.id, "DRM emulated"); drm_fb_helper_fill_fix(fbi, fb->pitches[0], fb->format->depth); From f57bef95d6f83830af86f8b0e73ff9197f77c3a7 Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Tue, 8 Jan 2019 12:23:52 +0500 Subject: [PATCH 40/58] drm/fb-helper: Partially bring back workaround for bugs of SDL 1.2 commit 62d85b3bf9d978ed4b6b2aeef5cf0ccf1423906e upstream. SDL 1.2 sets all fields related to the pixel format to zero in some cases[1]. Prior to commit db05c48197759 ("drm: fb-helper: Reject all pixel format changing requests"), there was an unintentional workaround for this that existed for more than a decade. First in device-specific DRM drivers, then here in drm_fb_helper.c. Previous code containing this workaround just ignores pixel format fields from userspace code. Not a good thing either, as this way, driver may silently use pixel format different from what client actually requested, and this in turn will lead to displaying garbage on the screen. I think that returning EINVAL to userspace in this particular case is the right option, so I decided to left code from problematic commit untouched instead of just reverting it entirely. Here is the steps required to reproduce this problem exactly: 1) Compile fceux[2] with SDL 1.2.15 and without GTK or OpenGL support. SDL should be compiled with fbdev support (which is on by default). 2) Create /etc/fb.modes with following contents (values seems not used, and just required to trigger problematic code in SDL): mode "test" geometry 1 1 1 1 1 timings 1 1 1 1 1 1 1 endmode 3) Create ~/.fceux/fceux.cfg with following contents: SDL.Hotkeys.Quit = 27 SDL.DoubleBuffering = 1 4) Ensure that screen resolution is at least 1280x960 (e.g. append "video=Virtual-1:1280x960-32" to the kernel cmdline for qemu/QXL). 5) Try to run fceux on VT with some ROM file[3]: # ./fceux color_test.nes [1] SDL 1.2.15 source code, src/video/fbcon/SDL_fbvideo.c, FB_SetVideoMode() [2] http://www.fceux.com [3] Example ROM: https://github.com/bokuweb/rustynes/blob/master/roms/color_test.nes Reported-by: saahriktu Suggested-by: saahriktu Cc: stable@vger.kernel.org Fixes: db05c48197759 ("drm: fb-helper: Reject all pixel format changing requests") Signed-off-by: Ivan Mironov [danvet: Delete misleading comment.] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190108072353.28078-2-mironov.ivan@gmail.com Link: https://patchwork.freedesktop.org/patch/msgid/20190108072353.28078-2-mironov.ivan@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 126 ++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 11174b876b70..b5b9f15549c2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, var_1->transp.msb_right == var_2->transp.msb_right; } +static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, + u8 depth) +{ + switch (depth) { + case 8: + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->red.length = 8; /* 8bit DAC */ + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + break; + case 15: + var->red.offset = 10; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 5; + var->blue.length = 5; + var->transp.offset = 15; + var->transp.length = 1; + break; + case 16: + var->red.offset = 11; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + var->transp.offset = 0; + break; + case 24: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + break; + case 32: + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 24; + var->transp.length = 8; + break; + default: + break; + } +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1650,6 +1708,20 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } + /* + * Workaround for SDL 1.2, which is known to be setting all pixel format + * fields values to zero in some cases. We treat this situation as a + * kind of "use some reasonable autodetected values". + */ + if (!var->red.offset && !var->green.offset && + !var->blue.offset && !var->transp.offset && + !var->red.length && !var->green.length && + !var->blue.length && !var->transp.length && + !var->red.msb_right && !var->green.msb_right && + !var->blue.msb_right && !var->transp.msb_right) { + drm_fb_helper_fill_pixel_fmt(var, fb->format->depth); + } + /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. @@ -1961,59 +2033,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helpe info->var.yoffset = 0; info->var.activate = FB_ACTIVATE_NOW; - switch (fb->format->depth) { - case 8: - info->var.red.offset = 0; - info->var.green.offset = 0; - info->var.blue.offset = 0; - info->var.red.length = 8; /* 8bit DAC */ - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 15: - info->var.red.offset = 10; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 5; - info->var.blue.length = 5; - info->var.transp.offset = 15; - info->var.transp.length = 1; - break; - case 16: - info->var.red.offset = 11; - info->var.green.offset = 5; - info->var.blue.offset = 0; - info->var.red.length = 5; - info->var.green.length = 6; - info->var.blue.length = 5; - info->var.transp.offset = 0; - break; - case 24: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 0; - info->var.transp.length = 0; - break; - case 32: - info->var.red.offset = 16; - info->var.green.offset = 8; - info->var.blue.offset = 0; - info->var.red.length = 8; - info->var.green.length = 8; - info->var.blue.length = 8; - info->var.transp.offset = 24; - info->var.transp.length = 8; - break; - default: - break; - } + drm_fb_helper_fill_pixel_fmt(&info->var, fb->format->depth); info->var.xres = fb_width; info->var.yres = fb_height; From d1a5113cf41b1314b741b0385d69713a2c270824 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 22 Dec 2018 03:06:23 +0000 Subject: [PATCH 41/58] drm/i915: Unwind failure on pinning the gen7 ppgtt commit 280d479b310298dfeb1d6f9a1617eca37beb6ce4 upstream. If we fail to pin the ggtt vma slot for the ppgtt page tables, we need to unwind the locals before reporting the error. Or else on subsequent attempts to bind the page tables into the ggtt, we will already believe that the vma has been pinned and continue on blithely. If something else should happen to be at that location, choas ensues. Fixes: a2bbf7148342 ("drm/i915/gtt: Only keep gen6 page directories pinned while active") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Cc: # v4.19+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20181222030623.21710-1-chris@chris-wilson.co.uk (cherry picked from commit d4de753526f4d99f541f1b6ed1d963005c09700c) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5f57f4e1fbc8..87411a5aba77 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2128,6 +2128,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) int gen6_ppgtt_pin(struct i915_hw_ppgtt *base) { struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); + int err; /* * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt @@ -2143,9 +2144,17 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base) * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - return i915_vma_pin(ppgtt->vma, - 0, GEN6_PD_ALIGN, - PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(ppgtt->vma, + 0, GEN6_PD_ALIGN, + PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + return 0; + +unpin: + ppgtt->pin_count = 0; + return err; } void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base) From f05d02b3b4c8f50393373efa05ab31878fb7635b Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 8 Jan 2019 16:11:27 -0500 Subject: [PATCH 42/58] drm/amdgpu: Don't ignore rc from drm_dp_mst_topology_mgr_resume() commit fe7553bef8d676d1d8b40666868b33ec39b9df5d upstream. drm_dp_mst_topology_mgr_resume() returns whether or not it managed to find the topology in question after a suspend resume cycle, and the driver is supposed to check this value and disable MST accordingly if it's gone-in addition to sending a hotplug in order to notify userspace that something changed during suspend. Currently, amdgpu just makes the mistake of ignoring the return code from drm_dp_mst_topology_mgr_resume() which means that if a topology was removed in suspend, amdgpu never notices and assumes it's still connected which leads to all sorts of problems. So, fix this by actually checking the rc from drm_dp_mst_topology_mgr_resume(). Also, reformat the rest of the function while we're at it to fix the over-indenting. Signed-off-by: Lyude Paul Reviewed-by: Harry Wentland Cc: Jerry Zuo Cc: # v4.15+ Link: https://patchwork.freedesktop.org/patch/msgid/20190108211133.32564-2-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d792735f1365..58af2ef73b76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -565,22 +565,36 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; + struct drm_dp_mst_topology_mgr *mgr; + int ret; + bool need_hotplug = false; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - aconnector = to_amdgpu_dm_connector(connector); - if (aconnector->dc_link->type == dc_connection_mst_branch && - !aconnector->mst_port) { + list_for_each_entry(connector, &dev->mode_config.connector_list, + head) { + aconnector = to_amdgpu_dm_connector(connector); + if (aconnector->dc_link->type != dc_connection_mst_branch || + aconnector->mst_port) + continue; - if (suspend) - drm_dp_mst_topology_mgr_suspend(&aconnector->mst_mgr); - else - drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr); - } + mgr = &aconnector->mst_mgr; + + if (suspend) { + drm_dp_mst_topology_mgr_suspend(mgr); + } else { + ret = drm_dp_mst_topology_mgr_resume(mgr); + if (ret < 0) { + drm_dp_mst_topology_mgr_set_mst(mgr, false); + need_hotplug = true; + } + } } drm_modeset_unlock(&dev->mode_config.connection_mutex); + + if (need_hotplug) + drm_kms_helper_hotplug_event(dev); } static int dm_hw_init(void *handle) From c7ca8e94dbb5cf81de178d8658d7861b1cf15444 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 8 Jan 2019 16:11:28 -0500 Subject: [PATCH 43/58] drm/amdgpu: Don't fail resume process if resuming atomic state fails commit 2d1af6a11cb9d88e0e3dd10258904c437fe1b315 upstream. This is an ugly one unfortunately. Currently, all DRM drivers supporting atomic modesetting will save the state that userspace had set before suspending, then attempt to restore that state on resume. This probably worked very well at one point, like many other things, until DP MST came into the picture. While it's easy to restore state on normal display connectors that were disconnected during suspend regardless of their state post-resume, this can't really be done with MST because of the fact that setting up a downstream sink requires performing sideband transactions between the source and the MST hub, sending out the ACT packets, etc. Because of this, there isn't really a guarantee that we can restore the atomic state we had before suspend once we've resumed. This sucks pretty bad, but so far I haven't run into any compositors that this actually causes serious issues with. Most compositors will notice the hotplug we send afterwards, and then reprobe state. Since nouveau and i915 also don't fail the suspend/resume process due to failing to restore the atomic state, let's make amdgpu match this behavior. Better to resume the GPU properly, then to stop the process half way because of a potentially unavoidable atomic commit failure. Eventually, we'll have a real fix for this problem on the DRM level. But we've got some more important low-hanging fruit to deal with first. Signed-off-by: Lyude Paul Reviewed-by: Harry Wentland Cc: Jerry Zuo Cc: # v4.15+ Link: https://patchwork.freedesktop.org/patch/msgid/20190108211133.32564-3-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 58af2ef73b76..a851bb07443f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -750,7 +750,6 @@ static int dm_resume(void *handle) struct drm_plane_state *new_plane_state; struct dm_plane_state *dm_new_plane_state; enum dc_connection_type new_connection_type = dc_connection_none; - int ret; int i; /* power on hardware */ @@ -823,13 +822,13 @@ static int dm_resume(void *handle) } } - ret = drm_atomic_helper_resume(ddev, dm->cached_state); + drm_atomic_helper_resume(ddev, dm->cached_state); dm->cached_state = NULL; amdgpu_dm_irq_resume_late(adev); - return ret; + return 0; } static const struct amd_ip_funcs amdgpu_dm_funcs = { From 997255351a29c1a5c8ad0456877c587301f0dc51 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 8 Jan 2019 19:47:38 +0100 Subject: [PATCH 44/58] rbd: don't return 0 on unmap if RBD_DEV_FLAG_REMOVING is set commit 85f5a4d666fd9be73856ed16bb36c5af5b406b29 upstream. There is a window between when RBD_DEV_FLAG_REMOVING is set and when the device is removed from rbd_dev_list. During this window, we set "already" and return 0. Returning 0 from write(2) can confuse userspace tools because 0 indicates that nothing was written. In particular, "rbd unmap" will retry the write multiple times a second: 10:28:05.463299 write(4, "0", 1) = 0 10:28:05.463509 write(4, "0", 1) = 0 10:28:05.463720 write(4, "0", 1) = 0 10:28:05.463942 write(4, "0", 1) = 0 10:28:05.464155 write(4, "0", 1) = 0 Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Tested-by: Dongsheng Yang Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 73ed5f3a862d..585378bc988c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5982,7 +5982,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus, struct list_head *tmp; int dev_id; char opt_buf[6]; - bool already = false; bool force = false; int ret; @@ -6015,13 +6014,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus, spin_lock_irq(&rbd_dev->lock); if (rbd_dev->open_count && !force) ret = -EBUSY; - else - already = test_and_set_bit(RBD_DEV_FLAG_REMOVING, - &rbd_dev->flags); + else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING, + &rbd_dev->flags)) + ret = -EINPROGRESS; spin_unlock_irq(&rbd_dev->lock); } spin_unlock(&rbd_dev_list_lock); - if (ret < 0 || already) + if (ret) return ret; if (force) { From 7c2ea25e1364d9cdcbc7c44a92dccaf86e799f3a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 24 Dec 2018 20:27:08 -0500 Subject: [PATCH 45/58] ext4: make sure enough credits are reserved for dioread_nolock writes commit 812c0cab2c0dfad977605dbadf9148490ca5d93f upstream. There are enough credits reserved for most dioread_nolock writes; however, if the extent tree is sufficiently deep, and/or quota is enabled, the code was not allowing for all eventualities when reserving journal credits for the unwritten extent conversion. This problem can be seen using xfstests ext4/034: WARNING: CPU: 1 PID: 257 at fs/ext4/ext4_jbd2.c:271 __ext4_handle_dirty_metadata+0x10c/0x180 Workqueue: ext4-rsv-conversion ext4_end_io_rsv_work RIP: 0010:__ext4_handle_dirty_metadata+0x10c/0x180 ... EXT4-fs: ext4_free_blocks:4938: aborting transaction: error 28 in __ext4_handle_dirty_metadata EXT4: jbd2_journal_dirty_metadata failed: handle type 11 started at line 4921, credits 4/0, errcode -28 EXT4-fs error (device dm-1) in ext4_free_blocks:4950: error 28 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 36abbdafb26e..0a5388347fca 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2748,7 +2748,8 @@ static int ext4_writepages(struct address_space *mapping, * We may need to convert up to one extent per block in * the page and we may dirty the inode. */ - rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits); + rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, + PAGE_SIZE >> inode->i_blkbits); } /* From 926cdac10439eda9ab7a11fd5f28eda203ccbb26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 25 Dec 2018 00:56:33 -0500 Subject: [PATCH 46/58] ext4: fix a potential fiemap/page fault deadlock w/ inline_data commit 2b08b1f12cd664dc7d5c84ead9ff25ae97ad5491 upstream. The ext4_inline_data_fiemap() function calls fiemap_fill_next_extent() while still holding the xattr semaphore. This is not necessary and it triggers a circular lockdep warning. This is because fiemap_fill_next_extent() could trigger a page fault when it writes into page which triggers a page fault. If that page is mmaped from the inline file in question, this could very well result in a deadlock. This problem can be reproduced using generic/519 with a file system configuration which has the inline_data feature enabled. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 27373d88b5f0..56f6e1782d5f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1890,12 +1890,12 @@ int ext4_inline_data_fiemap(struct inode *inode, physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); - if (physical) - error = fiemap_fill_next_extent(fieinfo, start, physical, - inline_len, flags); brelse(iloc.bh); out: up_read(&EXT4_I(inode)->xattr_sem); + if (physical) + error = fiemap_fill_next_extent(fieinfo, start, physical, + inline_len, flags); return (error < 0 ? error : 0); } From 01db6e5cf81f905028de903f2290c10172226043 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Dec 2018 23:20:39 -0500 Subject: [PATCH 47/58] ext4: avoid kernel warning when writing the superblock to a dead device commit e86807862e6880809f191c4cea7f88a489f0ed34 upstream. The xfstests generic/475 test switches the underlying device with dm-error while running a stress test. This results in a large number of file system errors, and since we can't lock the buffer head when marking the superblock dirty in the ext4_grp_locked_error() case, it's possible the superblock to be !buffer_uptodate() without buffer_write_io_error() being true. We need to set buffer_uptodate() before we call mark_buffer_dirty() or this will trigger a WARN_ON. It's safe to do this since the superblock must have been properly read into memory or the mount would have been successful. So if buffer_uptodate() is not set, we can safely assume that this happened due to a failed attempt to write the superblock. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ee0f30852835..a1cf7d68b4f0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4904,7 +4904,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) ext4_superblock_csum_set(sb); if (sync) lock_buffer(sbh); - if (buffer_write_io_error(sbh)) { + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the * superblock failed. This could happen because the From da38a1b47b02b01f96976bbf75466bc1ae519d76 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 00:10:48 -0500 Subject: [PATCH 48/58] ext4: use ext4_write_inode() when fsyncing w/o a journal commit ad211f3e94b314a910d4af03178a0b52a7d1ee0a upstream. In no-journal mode, we previously used __generic_file_fsync() in no-journal mode. This triggers a lockdep warning, and in addition, it's not safe to depend on the inode writeback mechanism in the case ext4. We can solve both problems by calling ext4_write_inode() directly. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsync.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 26a7fe5c4fd3..87a7ff00ef62 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,8 +116,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; + if (!journal) { - ret = __generic_file_fsync(file, start, end, datasync); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + ret = ext4_write_inode(inode, &wbc); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -125,9 +133,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. From bb80ad0dc3924f193eaa752e9da2e9384f58e627 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 00:11:07 -0500 Subject: [PATCH 49/58] ext4: track writeback errors using the generic tracking infrastructure commit 95cb67138746451cc84cf8e516e14989746e93b0 upstream. We already using mapping_set_error() in fs/ext4/page_io.c, so all we need to do is to use file_check_and_advance_wb_err() when handling fsync() requests in ext4_sync_file(). Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsync.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 87a7ff00ef62..712f00995390 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -164,6 +164,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = err; } out: + err = file_check_and_advance_wb_err(file); + if (ret == 0) + ret = err; trace_ext4_sync_file_exit(inode, ret); return ret; } From 5dc41af3d19e94253c95f62e8ac01158dd8df078 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Dec 2018 22:34:31 -0500 Subject: [PATCH 50/58] ext4: fix special inode number checks in __ext4_iget() commit 191ce17876c9367819c4b0a25b503c0f6d9054d8 upstream. The check for special (reserved) inode number checks in __ext4_iget() was broken by commit 8a363970d1dc: ("ext4: avoid declaring fs inconsistent due to invalid file handles"). This was caused by a botched reversal of the sense of the flag now known as EXT4_IGET_SPECIAL (when it was previously named EXT4_IGET_NORMAL). Fix the logic appropriately. Fixes: 8a363970d1dc ("ext4: avoid declaring fs inconsistent...") Signed-off-by: Theodore Ts'o Reported-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0a5388347fca..2c43c5b92229 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4803,7 +4803,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, gid_t i_gid; projid_t i_projid; - if (((flags & EXT4_IGET_NORMAL) && + if ((!(flags & EXT4_IGET_SPECIAL) && (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || (ino < EXT4_ROOT_INO) || (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { From 160f79c0a03996db3a3334ddd0ee21ce94973a79 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 8 Jan 2019 15:23:28 -0800 Subject: [PATCH 51/58] mm: page_mapped: don't assume compound page is huge or THP commit 8ab88c7169b7fba98812ead6524b9d05bc76cf00 upstream. LTP proc01 testcase has been observed to rarely trigger crashes on arm64: page_mapped+0x78/0xb4 stable_page_flags+0x27c/0x338 kpageflags_read+0xfc/0x164 proc_reg_read+0x7c/0xb8 __vfs_read+0x58/0x178 vfs_read+0x90/0x14c SyS_read+0x60/0xc0 The issue is that page_mapped() assumes that if compound page is not huge, then it must be THP. But if this is 'normal' compound page (COMPOUND_PAGE_DTOR), then following loop can keep running (for HPAGE_PMD_NR iterations) until it tries to read from memory that isn't mapped and triggers a panic: for (i = 0; i < hpage_nr_pages(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; } I could replicate this on x86 (v4.20-rc4-98-g60b548237fed) only with a custom kernel module [1] which: - allocates compound page (PAGEC) of order 1 - allocates 2 normal pages (COPY), which are initialized to 0xff (to satisfy _mapcount >= 0) - 2 PAGEC page structs are copied to address of first COPY page - second page of COPY is marked as not present - call to page_mapped(COPY) now triggers fault on access to 2nd COPY page at offset 0x30 (_mapcount) [1] https://github.com/jstancek/reproducers/blob/master/kernel/page_mapped_crash/repro.c Fix the loop to iterate for "1 << compound_order" pages. Kirrill said "IIRC, sound subsystem can producuce custom mapped compound pages". Link: http://lkml.kernel.org/r/c440d69879e34209feba21e12d236d06bc0a25db.1543577156.git.jstancek@redhat.com Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages") Signed-off-by: Jan Stancek Debugged-by: Laszlo Ersek Suggested-by: "Kirill A. Shutemov" Acked-by: Michal Hocko Acked-by: Kirill A. Shutemov Reviewed-by: David Hildenbrand Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index 9e3ebd2ef65f..6a24a1025d77 100644 --- a/mm/util.c +++ b/mm/util.c @@ -485,7 +485,7 @@ bool page_mapped(struct page *page) return true; if (PageHuge(page)) return false; - for (i = 0; i < hpage_nr_pages(page); i++) { + for (i = 0; i < (1 << compound_order(page)); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; } From 44e7bab39f877c9c095bfaaee943b0807574a7f7 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 24 Dec 2018 14:44:52 +0300 Subject: [PATCH 52/58] sunrpc: use-after-free in svc_process_common() commit d4b09acf924b84bae77cad090a9d108e70b43643 upstream. if node have NFSv41+ mounts inside several net namespaces it can lead to use-after-free in svc_process_common() svc_process_common() /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE svc_process_common() can use incorrect rqstp->rq_xprt, its caller function bc_svc_process() takes it from serv->sv_bc_xprt. The problem is that serv is global structure but sv_bc_xprt is assigned per-netnamespace. According to Trond, the whole "let's set up rqstp->rq_xprt for the back channel" is nothing but a giant hack in order to work around the fact that svc_process_common() uses it to find the xpt_ops, and perform a couple of (meaningless for the back channel) tests of xpt_flags. All we really need in svc_process_common() is to be able to run rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr() Bruce J Fields points that this xpo_prep_reply_hdr() call is an awfully roundabout way just to do "svc_putnl(resv, 0);" in the tcp case. This patch does not initialiuze rqstp->rq_xprt in bc_svc_process(), now it calls svc_process_common() with rqstp->rq_xprt = NULL. To adjust reply header svc_process_common() just check rqstp->rq_prot and calls svc_tcp_prep_reply_hdr() for tcp case. To handle rqstp->rq_xprt = NULL case in functions called from svc_process_common() patch intruduces net namespace pointer svc_rqst->rq_bc_net and adjust SVC_NET() definition. Some other function was also adopted to properly handle described case. Signed-off-by: Vasily Averin Cc: stable@vger.kernel.org Fixes: 23c20ecd4475 ("NFS: callback up - users counting cleanup") Signed-off-by: J. Bruce Fields v2: added lost extern svc_tcp_prep_reply_hdr() Signed-off-by: Vasily Averin Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/svc.h | 5 ++++- include/trace/events/sunrpc.h | 6 ++++-- net/sunrpc/svc.c | 11 +++++++---- net/sunrpc/svc_xprt.c | 5 +++-- net/sunrpc/svcsock.c | 2 +- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 73e130a840ce..fdb6b317d974 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -295,9 +295,12 @@ struct svc_rqst { struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ spinlock_t rq_lock; /* per-request lock */ + struct net *rq_bc_net; /* pointer to backchannel's + * net namespace + */ }; -#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) +#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* * Rigorous type checking on sockaddr type conversions diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index bbb08a3ef5cc..a2644c494a9c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -582,7 +582,8 @@ TRACE_EVENT(svc_process, __field(u32, vers) __field(u32, proc) __string(service, name) - __string(addr, rqst->rq_xprt->xpt_remotebuf) + __string(addr, rqst->rq_xprt ? + rqst->rq_xprt->xpt_remotebuf : "(null)") ), TP_fast_assign( @@ -590,7 +591,8 @@ TRACE_EVENT(svc_process, __entry->vers = rqst->rq_vers; __entry->proc = rqst->rq_proc; __assign_str(service, name); - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); + __assign_str(addr, rqst->rq_xprt ? + rqst->rq_xprt->xpt_remotebuf : "(null)"); ), TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u", diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d13e05f1a990..d65f8d35de87 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1144,6 +1144,8 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +extern void svc_tcp_prep_reply_hdr(struct svc_rqst *); + /* * Common routine for processing the RPC request. */ @@ -1172,7 +1174,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) clear_bit(RQ_DROPME, &rqstp->rq_flags); /* Setup reply header */ - rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); + if (rqstp->rq_prot == IPPROTO_TCP) + svc_tcp_prep_reply_hdr(rqstp); svc_putu32(resv, rqstp->rq_xid); @@ -1244,7 +1247,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) * for lower versions. RPC_PROG_MISMATCH seems to be the closest * fit. */ - if (versp->vs_need_cong_ctrl && + if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) goto err_bad_vers; @@ -1336,7 +1339,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) return 0; close: - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) svc_close_xprt(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; @@ -1459,10 +1462,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ - rqstp->rq_xprt = serv->sv_bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; + rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 83ccd0221c98..6cf0fd37cbf0 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -469,10 +469,11 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) */ void svc_reserve(struct svc_rqst *rqstp, int space) { + struct svc_xprt *xprt = rqstp->rq_xprt; + space += rqstp->rq_res.head[0].iov_len; - if (space < rqstp->rq_reserved) { - struct svc_xprt *xprt = rqstp->rq_xprt; + if (xprt && space < rqstp->rq_reserved) { atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index fc1c0d9ef57d..97a8282955a8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1198,7 +1198,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) /* * Setup response header. TCP has a 4B record length field. */ -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) +void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; From 4f14f446d115f022492f574bb02cff287b91915d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 11 Dec 2018 13:23:57 +0100 Subject: [PATCH 53/58] KVM: arm/arm64: Fix VMID alloc race by reverting to lock-less commit fb544d1ca65a89f7a3895f7531221ceeed74ada7 upstream. We recently addressed a VMID generation race by introducing a read/write lock around accesses and updates to the vmid generation values. However, kvm_arch_vcpu_ioctl_run() also calls need_new_vmid_gen() but does so without taking the read lock. As far as I can tell, this can lead to the same kind of race: VM 0, VCPU 0 VM 0, VCPU 1 ------------ ------------ update_vttbr (vmid 254) update_vttbr (vmid 1) // roll over read_lock(kvm_vmid_lock); force_vm_exit() local_irq_disable need_new_vmid_gen == false //because vmid gen matches enter_guest (vmid 254) kvm_arch.vttbr = : read_unlock(kvm_vmid_lock); enter_guest (vmid 1) Which results in running two VCPUs in the same VM with different VMIDs and (even worse) other VCPUs from other VMs could now allocate clashing VMID 254 from the new generation as long as VCPU 0 is not exiting. Attempt to solve this by making sure vttbr is updated before another CPU can observe the updated VMID generation. Cc: stable@vger.kernel.org Fixes: f0cf47d939d0 "KVM: arm/arm64: Close VMID generation race" Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 8fb31a7cc22c..91495045ad5a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -66,7 +66,7 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u32 kvm_next_vmid; static unsigned int kvm_vmid_bits __read_mostly; -static DEFINE_RWLOCK(kvm_vmid_lock); +static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; @@ -482,7 +482,9 @@ void force_vm_exit(const cpumask_t *mask) */ static bool need_new_vmid_gen(struct kvm *kvm) { - return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen)); + u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); + smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ + return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); } /** @@ -497,16 +499,11 @@ static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; u64 vmid; - bool new_gen; - read_lock(&kvm_vmid_lock); - new_gen = need_new_vmid_gen(kvm); - read_unlock(&kvm_vmid_lock); - - if (!new_gen) + if (!need_new_vmid_gen(kvm)) return; - write_lock(&kvm_vmid_lock); + spin_lock(&kvm_vmid_lock); /* * We need to re-check the vmid_gen here to ensure that if another vcpu @@ -514,7 +511,7 @@ static void update_vttbr(struct kvm *kvm) * use the same vmid. */ if (!need_new_vmid_gen(kvm)) { - write_unlock(&kvm_vmid_lock); + spin_unlock(&kvm_vmid_lock); return; } @@ -537,7 +534,6 @@ static void update_vttbr(struct kvm *kvm) kvm_call_hyp(__kvm_flush_vm_context); } - kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); kvm->arch.vmid = kvm_next_vmid; kvm_next_vmid++; kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; @@ -548,7 +544,10 @@ static void update_vttbr(struct kvm *kvm) vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; - write_unlock(&kvm_vmid_lock); + smp_wmb(); + WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); + + spin_unlock(&kvm_vmid_lock); } static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) From 6c9a2046297f7210498d47de9e348fe93270b099 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 3 Jan 2019 18:00:39 +0000 Subject: [PATCH 54/58] arm64: compat: Don't pull syscall number from regs in arm_compat_syscall commit 53290432145a8eb143fe29e06e9c1465d43dc723 upstream. The syscall number may have been changed by a tracer, so we should pass the actual number in from the caller instead of pulling it from the saved r7 value directly. Cc: Cc: Pi-Hsun Shih Reviewed-by: Dave Martin Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/sys_compat.c | 9 ++++----- arch/arm64/kernel/syscall.c | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 7be666062c7c..010212d35700 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -66,12 +66,11 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) /* * Handle all unrecognised system calls. */ -long compat_arm_syscall(struct pt_regs *regs) +long compat_arm_syscall(struct pt_regs *regs, int scno) { siginfo_t info; - unsigned int no = regs->regs[7]; - switch (no) { + switch (scno) { /* * Flush a region from virtual address 'r0' to virtual address 'r1' * _exclusive_. There is no alignment requirement on either address; @@ -107,7 +106,7 @@ long compat_arm_syscall(struct pt_regs *regs) * way the calling program can gracefully determine whether * a feature is supported. */ - if (no < __ARM_NR_COMPAT_END) + if (scno < __ARM_NR_COMPAT_END) return -ENOSYS; break; } @@ -119,6 +118,6 @@ long compat_arm_syscall(struct pt_regs *regs) info.si_addr = (void __user *)instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); - arm64_notify_die("Oops - bad compat syscall(2)", regs, &info, no); + arm64_notify_die("Oops - bad compat syscall(2)", regs, &info, scno); return 0; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 032d22312881..5610ac01c1ec 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -13,16 +13,15 @@ #include #include -long compat_arm_syscall(struct pt_regs *regs); - +long compat_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); -asmlinkage long do_ni_syscall(struct pt_regs *regs) +static long do_ni_syscall(struct pt_regs *regs, int scno) { #ifdef CONFIG_COMPAT long ret; if (is_compat_task()) { - ret = compat_arm_syscall(regs); + ret = compat_arm_syscall(regs, scno); if (ret != -ENOSYS) return ret; } @@ -47,7 +46,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)]; ret = __invoke_syscall(regs, syscall_fn); } else { - ret = do_ni_syscall(regs); + ret = do_ni_syscall(regs, scno); } regs->regs[0] = ret; From 829431a2a5a8495ee3a144de1be12e34dec45f91 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Nov 2018 09:48:12 +0000 Subject: [PATCH 55/58] Btrfs: fix access to available allocation bits when starting balance commit 5a8067c0d17feb7579db0476191417b441a8996e upstream. The available allocation bits members from struct btrfs_fs_info are protected by a sequence lock, and when starting balance we access them incorrectly in two different ways: 1) In the read sequence lock loop at btrfs_balance() we use the values we read from fs_info->avail_*_alloc_bits and we can immediately do actions that have side effects and can not be undone (printing a message and jumping to a label). This is wrong because a retry might be needed, so our actions must not have side effects and must be repeatable as long as read_seqretry() returns a non-zero value. In other words, we were essentially ignoring the sequence lock; 2) Right below the read sequence lock loop, we were reading the values from avail_metadata_alloc_bits and avail_data_alloc_bits without any protection from concurrent writers, that is, reading them outside of the read sequence lock critical section. So fix this by making sure we only read the available allocation bits while in a read sequence lock critical section and that what we do in the critical section is repeatable (has nothing that can not be undone) so that any eventual retry that is needed is handled properly. Fixes: de98ced9e743 ("Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits") Fixes: 14506127979a ("btrfs: fix a bogus warning when converting only data or metadata") Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4405e430da6..223334f08530 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3712,6 +3712,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, int ret; u64 num_devices; unsigned seq; + bool reducing_integrity; if (btrfs_fs_closing(fs_info) || atomic_read(&fs_info->balance_pause_req) || @@ -3796,24 +3797,30 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, !(bctl->sys.target & allowed)) || ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && (fs_info->avail_metadata_alloc_bits & allowed) && - !(bctl->meta.target & allowed))) { - if (bctl->flags & BTRFS_BALANCE_FORCE) { - btrfs_info(fs_info, - "balance: force reducing metadata integrity"); - } else { - btrfs_err(fs_info, - "balance: reduces metadata integrity, use --force if you want this"); - ret = -EINVAL; - goto out; - } - } + !(bctl->meta.target & allowed))) + reducing_integrity = true; + else + reducing_integrity = false; + + /* if we're not converting, the target field is uninitialized */ + meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->meta.target : fs_info->avail_metadata_alloc_bits; + data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->data.target : fs_info->avail_data_alloc_bits; } while (read_seqretry(&fs_info->profiles_lock, seq)); - /* if we're not converting, the target field is uninitialized */ - meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? - bctl->meta.target : fs_info->avail_metadata_alloc_bits; - data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? - bctl->data.target : fs_info->avail_data_alloc_bits; + if (reducing_integrity) { + if (bctl->flags & BTRFS_BALANCE_FORCE) { + btrfs_info(fs_info, + "balance: force reducing metadata integrity"); + } else { + btrfs_err(fs_info, + "balance: reduces metadata integrity, use --force if you want this"); + ret = -EINVAL; + goto out; + } + } + if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { int meta_index = btrfs_bg_flags_to_raid_index(meta_target); From 79aa5c0daa5c7a2c4de945f4964702f3fcb6f8a3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Nov 2018 14:15:36 +0000 Subject: [PATCH 56/58] Btrfs: fix deadlock when enabling quotas due to concurrent snapshot creation commit 9a6f209e36500efac51528132a3e3083586eda5f upstream. If the quota enable and snapshot creation ioctls are called concurrently we can get into a deadlock where the task enabling quotas will deadlock on the fs_info->qgroup_ioctl_lock mutex because it attempts to lock it twice, or the task creating a snapshot tries to commit the transaction while the task enabling quota waits for the former task to commit the transaction while holding the mutex. The following time diagrams show how both cases happen. First scenario: CPU 0 CPU 1 btrfs_ioctl() btrfs_ioctl_quota_ctl() btrfs_quota_enable() mutex_lock(fs_info->qgroup_ioctl_lock) btrfs_start_transaction() btrfs_ioctl() btrfs_ioctl_snap_create_v2 create_snapshot() --> adds snapshot to the list pending_snapshots of the current transaction btrfs_commit_transaction() create_pending_snapshots() create_pending_snapshot() qgroup_account_snapshot() btrfs_qgroup_inherit() mutex_lock(fs_info->qgroup_ioctl_lock) --> deadlock, mutex already locked by this task at btrfs_quota_enable() Second scenario: CPU 0 CPU 1 btrfs_ioctl() btrfs_ioctl_quota_ctl() btrfs_quota_enable() mutex_lock(fs_info->qgroup_ioctl_lock) btrfs_start_transaction() btrfs_ioctl() btrfs_ioctl_snap_create_v2 create_snapshot() --> adds snapshot to the list pending_snapshots of the current transaction btrfs_commit_transaction() --> waits for task at CPU 0 to release its transaction handle btrfs_commit_transaction() --> sees another task started the transaction commit first --> releases its transaction handle --> waits for the transaction commit to be completed by the task at CPU 1 create_pending_snapshot() qgroup_account_snapshot() btrfs_qgroup_inherit() mutex_lock(fs_info->qgroup_ioctl_lock) --> deadlock, task at CPU 0 has the mutex locked but it is waiting for us to finish the transaction commit So fix this by setting the quota enabled flag in fs_info after committing the transaction at btrfs_quota_enable(). This ends up serializing quota enable and snapshot creation as if the snapshot creation happened just before the quota enable request. The quota rescan task, scheduled after committing the transaction in btrfs_quote_enable(), will do the accounting. Fixes: 6426c7ad697d ("btrfs: qgroup: Fix qgroup accounting when creating snapshot") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ff434663d65b..e1fcb28ad4cc 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1013,16 +1013,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) btrfs_abort_transaction(trans, ret); goto out_free_path; } - spin_lock(&fs_info->qgroup_lock); - fs_info->quota_root = quota_root; - set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - spin_unlock(&fs_info->qgroup_lock); ret = btrfs_commit_transaction(trans); trans = NULL; if (ret) goto out_free_path; + /* + * Set quota enabled flag after committing the transaction, to avoid + * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot + * creation. + */ + spin_lock(&fs_info->qgroup_lock); + fs_info->quota_root = quota_root; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + spin_unlock(&fs_info->qgroup_lock); + ret = qgroup_rescan_init(fs_info, 0, 1); if (!ret) { qgroup_rescan_zero_tracking(fs_info); From 7a1b9b76bac76498657b9322164957c2a50c573b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 10 Dec 2018 17:53:35 +0000 Subject: [PATCH 57/58] Btrfs: use nofs context when initializing security xattrs to avoid deadlock commit 827aa18e7b903c5ff3b3cd8fec328a99b1dbd411 upstream. When initializing the security xattrs, we are holding a transaction handle therefore we need to use a GFP_NOFS context in order to avoid a deadlock with reclaim in case it's triggered. Fixes: 39a27ec1004e8 ("btrfs: use GFP_KERNEL for xattr and acl allocations") Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/xattr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index ea78c3d6dcfc..f141b45ce349 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" @@ -422,9 +423,15 @@ static int btrfs_initxattrs(struct inode *inode, { const struct xattr *xattr; struct btrfs_trans_handle *trans = fs_info; + unsigned int nofs_flag; char *name; int err = 0; + /* + * We're holding a transaction handle, so use a NOFS memory allocation + * context to avoid deadlock if reclaim happens. + */ + nofs_flag = memalloc_nofs_save(); for (xattr = xattr_array; xattr->name != NULL; xattr++) { name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(xattr->name) + 1, GFP_KERNEL); @@ -440,6 +447,7 @@ static int btrfs_initxattrs(struct inode *inode, if (err < 0) break; } + memalloc_nofs_restore(nofs_flag); return err; } From 9c5931b65a7b58ddeaf1530f1c4b515ba8640f8d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Jan 2019 22:04:38 +0100 Subject: [PATCH 58/58] Linux 4.19.16 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0e30d48274fa..e8cb4875b86d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = "People's Front"