fd0e786d9d
In the following commit:ce0fa3e56a
("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") ... we added code to memory_failure() to unmap the page from the kernel 1:1 virtual address space to avoid speculative access to the page logging additional errors. But memory_failure() may not always succeed in taking the page offline, especially if the page belongs to the kernel. This can happen if there are too many corrected errors on a page and either mcelog(8) or drivers/ras/cec.c asks to take a page offline. Since we remove the 1:1 mapping early in memory_failure(), we can end up with the page unmapped, but still in use. On the next access the kernel crashes :-( There are also various debug paths that call memory_failure() to simulate occurrence of an error. Since there is no actual error in memory, we don't need to map out the page for those cases. Revert most of the previous attempt and keep the solution local to arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when: 1) there is a real error 2) memory_failure() succeeds. All of this only applies to 64-bit systems. 32-bit kernel doesn't map all of memory into kernel space. It isn't worth adding the code to unmap the piece that is mapped because nobody would run a 32-bit kernel on a machine that has recoverable machine checks. Signed-off-by: Tony Luck <tony.luck@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave <dave.hansen@intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert (Persistent Memory) <elliott@hpe.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org #v4.14 Fixes:ce0fa3e56a
("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") Signed-off-by: Ingo Molnar <mingo@kernel.org>
130 lines
3.4 KiB
C
130 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef LINUX_MM_INLINE_H
|
|
#define LINUX_MM_INLINE_H
|
|
|
|
#include <linux/huge_mm.h>
|
|
#include <linux/swap.h>
|
|
|
|
/**
|
|
* page_is_file_cache - should the page be on a file LRU or anon LRU?
|
|
* @page: the page to test
|
|
*
|
|
* Returns 1 if @page is page cache page backed by a regular filesystem,
|
|
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
|
|
* Used by functions that manipulate the LRU lists, to sort a page
|
|
* onto the right LRU list.
|
|
*
|
|
* We would like to get this info without a page flag, but the state
|
|
* needs to survive until the page is last deleted from the LRU, which
|
|
* could be as far down as __page_cache_release.
|
|
*/
|
|
static inline int page_is_file_cache(struct page *page)
|
|
{
|
|
return !PageSwapBacked(page);
|
|
}
|
|
|
|
static __always_inline void __update_lru_size(struct lruvec *lruvec,
|
|
enum lru_list lru, enum zone_type zid,
|
|
int nr_pages)
|
|
{
|
|
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
|
|
__mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
|
|
__mod_zone_page_state(&pgdat->node_zones[zid],
|
|
NR_ZONE_LRU_BASE + lru, nr_pages);
|
|
}
|
|
|
|
static __always_inline void update_lru_size(struct lruvec *lruvec,
|
|
enum lru_list lru, enum zone_type zid,
|
|
int nr_pages)
|
|
{
|
|
__update_lru_size(lruvec, lru, zid, nr_pages);
|
|
#ifdef CONFIG_MEMCG
|
|
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
|
#endif
|
|
}
|
|
|
|
static __always_inline void add_page_to_lru_list(struct page *page,
|
|
struct lruvec *lruvec, enum lru_list lru)
|
|
{
|
|
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
|
|
list_add(&page->lru, &lruvec->lists[lru]);
|
|
}
|
|
|
|
static __always_inline void add_page_to_lru_list_tail(struct page *page,
|
|
struct lruvec *lruvec, enum lru_list lru)
|
|
{
|
|
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
|
|
list_add_tail(&page->lru, &lruvec->lists[lru]);
|
|
}
|
|
|
|
static __always_inline void del_page_from_lru_list(struct page *page,
|
|
struct lruvec *lruvec, enum lru_list lru)
|
|
{
|
|
list_del(&page->lru);
|
|
update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
|
|
}
|
|
|
|
/**
|
|
* page_lru_base_type - which LRU list type should a page be on?
|
|
* @page: the page to test
|
|
*
|
|
* Used for LRU list index arithmetic.
|
|
*
|
|
* Returns the base LRU type - file or anon - @page should be on.
|
|
*/
|
|
static inline enum lru_list page_lru_base_type(struct page *page)
|
|
{
|
|
if (page_is_file_cache(page))
|
|
return LRU_INACTIVE_FILE;
|
|
return LRU_INACTIVE_ANON;
|
|
}
|
|
|
|
/**
|
|
* page_off_lru - which LRU list was page on? clearing its lru flags.
|
|
* @page: the page to test
|
|
*
|
|
* Returns the LRU list a page was on, as an index into the array of LRU
|
|
* lists; and clears its Unevictable or Active flags, ready for freeing.
|
|
*/
|
|
static __always_inline enum lru_list page_off_lru(struct page *page)
|
|
{
|
|
enum lru_list lru;
|
|
|
|
if (PageUnevictable(page)) {
|
|
__ClearPageUnevictable(page);
|
|
lru = LRU_UNEVICTABLE;
|
|
} else {
|
|
lru = page_lru_base_type(page);
|
|
if (PageActive(page)) {
|
|
__ClearPageActive(page);
|
|
lru += LRU_ACTIVE;
|
|
}
|
|
}
|
|
return lru;
|
|
}
|
|
|
|
/**
|
|
* page_lru - which LRU list should a page be on?
|
|
* @page: the page to test
|
|
*
|
|
* Returns the LRU list a page should be on, as an index
|
|
* into the array of LRU lists.
|
|
*/
|
|
static __always_inline enum lru_list page_lru(struct page *page)
|
|
{
|
|
enum lru_list lru;
|
|
|
|
if (PageUnevictable(page))
|
|
lru = LRU_UNEVICTABLE;
|
|
else {
|
|
lru = page_lru_base_type(page);
|
|
if (PageActive(page))
|
|
lru += LRU_ACTIVE;
|
|
}
|
|
return lru;
|
|
}
|
|
|
|
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
|
|
|
|
#endif
|