diff options
| author | SeongJae Park <sj@kernel.org> | 2025-04-09 17:00:20 -0700 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-05-11 17:48:27 -0700 |
| commit | 01bef02bf9301ea6c255b0daa38356e07719dd69 (patch) | |
| tree | a86be65236a02978ca3477d73e497c3678f4a176 /mm/madvise.c | |
| parent | 066c770437835d2bd2072bd2c88a71fcbbd5ccb3 (diff) | |
mm/madvise: batch tlb flushes for MADV_FREE
MADV_FREE handling for [process_]madvise() flushes tlb for each vma of
each address range. Update the logic to do tlb flushes in a batched way.
Initialize an mmu_gather object from do_madvise() and vector_madvise(),
which are the entry level functions for [process_]madvise(), respectively.
And pass those objects to the function for per-vma work, via
madvise_behavior struct. Make the per-vma logic not flushes tlb on their
own but just saves the tlb entries to the received mmu_gather object.
Finally, the entry level functions flush the tlb entries that gathered for
the entire user request, at once.
Link: https://lkml.kernel.org/r/20250410000022.1901-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/madvise.c')
| -rw-r--r-- | mm/madvise.c | 57 |
1 files changed, 46 insertions, 11 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index 26fa868b41af..951038a9f36f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -799,12 +799,13 @@ static const struct mm_walk_ops madvise_free_walk_ops = { .walk_lock = PGWALK_RDLOCK, }; -static int madvise_free_single_vma(struct vm_area_struct *vma, +static int madvise_free_single_vma(struct madvise_behavior *madv_behavior, + struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; - struct mmu_gather tlb; + struct mmu_gather *tlb = madv_behavior->tlb; /* MADV_FREE works for only anon vma at the moment */ if (!vma_is_anonymous(vma)) @@ -820,17 +821,14 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, range.start, range.end); lru_add_drain(); - tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); - tlb_start_vma(&tlb, vma); + tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, - &madvise_free_walk_ops, &tlb); - tlb_end_vma(&tlb, vma); + &madvise_free_walk_ops, tlb); + tlb_end_vma(tlb, vma); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb); - return 0; } @@ -954,7 +952,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) return madvise_dontneed_single_vma(vma, start, end); else if (behavior == MADV_FREE) - return madvise_free_single_vma(vma, start, end); + return madvise_free_single_vma(madv_behavior, vma, start, end); else return -EINVAL; } @@ -1627,6 +1625,29 @@ static void madvise_unlock(struct mm_struct *mm, int behavior) mmap_read_unlock(mm); } +static bool madvise_batch_tlb_flush(int behavior) +{ + switch (behavior) { + case MADV_FREE: + return true; + default: + return false; + } +} + +static void madvise_init_tlb(struct madvise_behavior *madv_behavior, + struct mm_struct *mm) +{ + if (madvise_batch_tlb_flush(madv_behavior->behavior)) + tlb_gather_mmu(madv_behavior->tlb, mm); +} + +static void madvise_finish_tlb(struct madvise_behavior *madv_behavior) +{ + if (madvise_batch_tlb_flush(madv_behavior->behavior)) + tlb_finish_mmu(madv_behavior->tlb); +} + static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior) { size_t len; @@ -1783,14 +1804,20 @@ static int madvise_do_behavior(struct mm_struct *mm, int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { int error; - struct madvise_behavior madv_behavior = {.behavior = behavior}; + struct mmu_gather tlb; + struct madvise_behavior madv_behavior = { + .behavior = behavior, + .tlb = &tlb, + }; if (madvise_should_skip(start, len_in, behavior, &error)) return error; error = madvise_lock(mm, behavior); if (error) return error; + madvise_init_tlb(&madv_behavior, mm); error = madvise_do_behavior(mm, start, len_in, &madv_behavior); + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); return error; @@ -1807,13 +1834,18 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, { ssize_t ret = 0; size_t total_len; - struct madvise_behavior madv_behavior = {.behavior = behavior}; + struct mmu_gather tlb; + struct madvise_behavior madv_behavior = { + .behavior = behavior, + .tlb = &tlb, + }; total_len = iov_iter_count(iter); ret = madvise_lock(mm, behavior); if (ret) return ret; + madvise_init_tlb(&madv_behavior, mm); while (iov_iter_count(iter)) { unsigned long start = (unsigned long)iter_iov_addr(iter); @@ -1842,14 +1874,17 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, } /* Drop and reacquire lock to unwind race. */ + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); madvise_lock(mm, behavior); + madvise_init_tlb(&madv_behavior, mm); continue; } if (ret < 0) break; iov_iter_advance(iter, iter_iov_len(iter)); } + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); ret = (total_len - iov_iter_count(iter)) ? : ret; |
