diff options
| author | Lorenzo Stoakes <lorenzo.stoakes@oracle.com> | 2025-11-10 22:21:21 +0000 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-24 15:08:50 -0800 |
| commit | fb888710e26a8a8a37dc0f8ed09a3c908c63eb71 (patch) | |
| tree | b40879446963a28fd9c84cc9a31df225281783b6 /mm | |
| parent | 68aa2fdbf57f769e552f472ddb762aba028a207e (diff) | |
mm: avoid unnecessary uses of is_swap_pte()
There's an established convention in the kernel that we treat PTEs as
containing swap entries (and the unfortunately named non-swap swap
entries) should they be neither empty (i.e. pte_none() evaluating true)
nor present (i.e. pte_present() evaluating true).
However, there is some inconsistency in how this is applied, as we also
have the is_swap_pte() helper which explicitly performs this check:
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
return !pte_none(pte) && !pte_present(pte);
}
As this represents a predicate, and it's logical to assume that in order
to establish that a PTE entry can correctly be manipulated as a
swap/non-swap entry, this predicate seems as if it must first be checked.
But we instead, we far more often utilise the established convention of
checking pte_none() / pte_present() before operating on entries as if they
were swap/non-swap.
This patch works towards correcting this inconsistency by removing all
uses of is_swap_pte() where we are already in a position where we perform
pte_none()/pte_present() checks anyway or otherwise it is clearly logical
to do so.
We also take advantage of the fact that pte_swp_uffd_wp() is only set on
swap entries.
Additionally, update comments referencing to is_swap_pte() and
non_swap_entry().
No functional change intended.
Link: https://lkml.kernel.org/r/17fd6d7f46a846517fd455fadd640af47fcd7c55.1762812360.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/hugetlb.c | 6 | ||||
| -rw-r--r-- | mm/internal.h | 6 | ||||
| -rw-r--r-- | mm/khugepaged.c | 29 | ||||
| -rw-r--r-- | mm/migrate.c | 2 | ||||
| -rw-r--r-- | mm/mprotect.c | 43 | ||||
| -rw-r--r-- | mm/mremap.c | 7 | ||||
| -rw-r--r-- | mm/page_table_check.c | 13 | ||||
| -rw-r--r-- | mm/page_vma_mapped.c | 31 |
8 files changed, 69 insertions, 68 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 12853cdefc9b..59d91c36770c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5092,13 +5092,13 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); - if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) + if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) { huge_pte_clear(mm, new_addr, dst_pte, sz); - else { + } else { if (need_clear_uffd_wp) { if (pte_present(pte)) pte = huge_pte_clear_uffd_wp(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_clear_uffd_wp(pte); } set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); diff --git a/mm/internal.h b/mm/internal.h index 2bad3971813b..a9b38cadb192 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -325,8 +325,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, /** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta - * @pte: The initial pte state; is_swap_pte(pte) must be true and - * non_swap_entry() must be false. + * @pte: The initial pte state; must be a swap entry * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * @@ -352,8 +351,7 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) /** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. - * @pte: The initial pte state; is_swap_pte(pte) must be true and - * non_swap_entry() must be false. + * @pte: The initial pte state; must be a swap entry. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. diff --git a/mm/khugepaged.c b/mm/khugepaged.c index af1c162c9a94..d7e71c2e2571 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1019,7 +1019,8 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, } vmf.orig_pte = ptep_get_lockless(pte); - if (!is_swap_pte(vmf.orig_pte)) + if (pte_none(vmf.orig_pte) || + pte_present(vmf.orig_pte)) continue; vmf.pte = pte; @@ -1276,7 +1277,19 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, addr += PAGE_SIZE) { pte_t pteval = ptep_get(_pte); - if (is_swap_pte(pteval)) { + if (pte_none_or_zero(pteval)) { + ++none_or_zero; + if (!userfaultfd_armed(vma) && + (!cc->is_khugepaged || + none_or_zero <= khugepaged_max_ptes_none)) { + continue; + } else { + result = SCAN_EXCEED_NONE_PTE; + count_vm_event(THP_SCAN_EXCEED_NONE_PTE); + goto out_unmap; + } + } + if (!pte_present(pteval)) { ++unmapped; if (!cc->is_khugepaged || unmapped <= khugepaged_max_ptes_swap) { @@ -1296,18 +1309,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out_unmap; } } - if (pte_none_or_zero(pteval)) { - ++none_or_zero; - if (!userfaultfd_armed(vma) && - (!cc->is_khugepaged || - none_or_zero <= khugepaged_max_ptes_none)) { - continue; - } else { - result = SCAN_EXCEED_NONE_PTE; - count_vm_event(THP_SCAN_EXCEED_NONE_PTE); - goto out_unmap; - } - } if (pte_uffd_wp(pteval)) { /* * Don't collapse the page if any of the small diff --git a/mm/migrate.c b/mm/migrate.c index d8f6cd14cdb7..847c1ec17628 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -492,7 +492,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, pte = ptep_get(ptep); pte_unmap(ptep); - if (!is_swap_pte(pte)) + if (pte_none(pte) || pte_present(pte)) goto out; entry = pte_to_swp_entry(pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index 918a64cc6033..aa555dfbdfc5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -297,7 +297,26 @@ static long change_pte_range(struct mmu_gather *tlb, prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb); pages += nr_ptes; - } else if (is_swap_pte(oldpte)) { + } else if (pte_none(oldpte)) { + /* + * Nobody plays with any none ptes besides + * userfaultfd when applying the protections. + */ + if (likely(!uffd_wp)) + continue; + + if (userfaultfd_wp_use_markers(vma)) { + /* + * For file-backed mem, we need to be able to + * wr-protect a none pte, because even if the + * pte is none, the page/swap cache could + * exist. Doing that by install a marker. + */ + set_pte_at(vma->vm_mm, addr, pte, + make_pte_marker(PTE_MARKER_UFFD_WP)); + pages++; + } + } else { swp_entry_t entry = pte_to_swp_entry(oldpte); pte_t newpte; @@ -358,28 +377,6 @@ static long change_pte_range(struct mmu_gather *tlb, set_pte_at(vma->vm_mm, addr, pte, newpte); pages++; } - } else { - /* It must be an none page, or what else?.. */ - WARN_ON_ONCE(!pte_none(oldpte)); - - /* - * Nobody plays with any none ptes besides - * userfaultfd when applying the protections. - */ - if (likely(!uffd_wp)) - continue; - - if (userfaultfd_wp_use_markers(vma)) { - /* - * For file-backed mem, we need to be able to - * wr-protect a none pte, because even if the - * pte is none, the page/swap cache could - * exist. Doing that by install a marker. - */ - set_pte_at(vma->vm_mm, addr, pte, - make_pte_marker(PTE_MARKER_UFFD_WP)); - pages++; - } } } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); diff --git a/mm/mremap.c b/mm/mremap.c index 7c21b2ad13f6..62b6827abacf 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -158,6 +158,9 @@ static void drop_rmap_locks(struct vm_area_struct *vma) static pte_t move_soft_dirty_pte(pte_t pte) { + if (pte_none(pte)) + return pte; + /* * Set soft dirty bit so we can notice * in userspace the ptes were moved. @@ -165,7 +168,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) #ifdef CONFIG_MEM_SOFT_DIRTY if (pte_present(pte)) pte = pte_mksoft_dirty(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_mksoft_dirty(pte); #endif return pte; @@ -294,7 +297,7 @@ static int move_ptes(struct pagetable_move_control *pmc, if (need_clear_uffd_wp) { if (pte_present(pte)) pte = pte_clear_uffd_wp(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_clear_uffd_wp(pte); } set_ptes(mm, new_addr, new_ptep, pte, nr_ptes); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 4eeca782b888..43f75d2f7c36 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -185,12 +185,15 @@ static inline bool swap_cached_writable(swp_entry_t entry) is_writable_migration_entry(entry); } -static inline void page_table_check_pte_flags(pte_t pte) +static void page_table_check_pte_flags(pte_t pte) { - if (pte_present(pte) && pte_uffd_wp(pte)) - WARN_ON_ONCE(pte_write(pte)); - else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte)) - WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte))); + if (pte_present(pte)) { + WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte)); + } else if (pte_swp_uffd_wp(pte)) { + const swp_entry_t entry = pte_to_swp_entry(pte); + + WARN_ON_ONCE(swap_cached_writable(entry)); + } } void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index be20468fb5a9..a4e23818f37f 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw) static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, spinlock_t **ptlp) { + bool is_migration; pte_t ptent; if (pvmw->flags & PVMW_SYNC) { @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, return !!pvmw->pte; } + is_migration = pvmw->flags & PVMW_MIGRATION; again: /* * It is important to return the ptl corresponding to pte, @@ -41,11 +43,14 @@ again: ptent = ptep_get(pvmw->pte); - if (pvmw->flags & PVMW_MIGRATION) { - if (!is_swap_pte(ptent)) + if (pte_none(ptent)) { + return false; + } else if (pte_present(ptent)) { + if (is_migration) return false; - } else if (is_swap_pte(ptent)) { + } else if (!is_migration) { swp_entry_t entry; + /* * Handle un-addressable ZONE_DEVICE memory. * @@ -66,8 +71,6 @@ again: if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; - } else if (!pte_present(ptent)) { - return false; } spin_lock(*ptlp); if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) { @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) return false; pfn = softleaf_to_pfn(entry); - } else if (is_swap_pte(ptent)) { - swp_entry_t entry; + } else if (pte_present(ptent)) { + pfn = pte_pfn(ptent); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); /* Handle un-addressable ZONE_DEVICE memory */ - entry = pte_to_swp_entry(ptent); - if (!is_device_private_entry(entry) && - !is_device_exclusive_entry(entry)) - return false; - - pfn = swp_offset_pfn(entry); - } else { - if (!pte_present(ptent)) + if (!softleaf_is_device_private(entry) && + !softleaf_is_device_exclusive(entry)) return false; - pfn = pte_pfn(ptent); + pfn = softleaf_to_pfn(entry); } if ((pfn + pte_nr - 1) < pvmw->pfn) |
