summaryrefslogtreecommitdiff
path: root/fs/hugetlbfs
diff options
context:
space:
mode:
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>2025-10-20 13:11:29 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-11-16 17:28:13 -0800
commitea52cb24cd3fb121283754ab82b2cb3044609359 (patch)
tree7e41c9ce16637481ea3fd08220821966281b059e /fs/hugetlbfs
parentda003453dce728857bea2e3de74132a90c9c78e7 (diff)
mm/hugetlbfs: update hugetlbfs to use mmap_prepare
Since we can now perform actions after the VMA is established via mmap_prepare, use desc->action_success_hook to set up the hugetlb lock once the VMA is setup. We also make changes throughout hugetlbfs to make this possible. Note that we must hide newly established hugetlb VMAs from the rmap until the operation is entirely complete as we establish a hugetlb lock during VMA setup that can be raced by rmap users. Link: https://lkml.kernel.org/r/b1afa16d3cfa585a03df9ae215ae9f905b3f0ed7.1760959442.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Andrey Konovalov <andreyknvl@gmail.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Chatre, Reinette <reinette.chatre@intel.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Dave Martin <dave.martin@arm.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: David S. Miller <davem@davemloft.net> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: James Morse <james.morse@arm.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Tian <kevin.tian@intel.com> Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Nicolas Pitre <nico@fluxnic.net> Cc: Oscar Salvador <osalvador@suse.de> Cc: Pedro Falcato <pfalcato@suse.de> Cc: Robin Murohy <robin.murphy@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'fs/hugetlbfs')
-rw-r--r--fs/hugetlbfs/inode.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ce8e40d35032..3919fca56553 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,8 +96,15 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
#define PGOFF_LOFFT_MAX \
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
-static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma)
{
+ /* Unfortunate we have to reassign vma->vm_private_data. */
+ return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma);
+}
+
+static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
+{
+ struct file *file = desc->file;
struct inode *inode = file_inode(file);
loff_t len, vma_len;
int ret;
@@ -112,8 +119,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
- vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
- vma->vm_ops = &hugetlb_vm_ops;
+ desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+ desc->vm_ops = &hugetlb_vm_ops;
/*
* page based offset in vm_pgoff could be sufficiently large to
@@ -122,16 +129,16 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* sizeof(unsigned long). So, only check in those instances.
*/
if (sizeof(unsigned long) == sizeof(loff_t)) {
- if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ if (desc->pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
}
/* must be huge page aligned */
- if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+ if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
- vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ vma_len = (loff_t)vma_desc_size(desc);
+ len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
@@ -141,7 +148,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = -ENOMEM;
- vm_flags = vma->vm_flags;
+ vm_flags = desc->vm_flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -151,17 +158,30 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags |= VM_NORESERVE;
if (hugetlb_reserve_pages(inode,
- vma->vm_pgoff >> huge_page_order(h),
- len >> huge_page_shift(h), vma,
- vm_flags) < 0)
+ desc->pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), desc,
+ vm_flags) < 0)
goto out;
ret = 0;
- if (vma->vm_flags & VM_WRITE && inode->i_size < len)
+ if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
+ if (!ret) {
+ /* Allocate the VMA lock after we set it up. */
+ desc->action.success_hook = hugetlb_file_mmap_prepare_success;
+ /*
+ * We cannot permit the rmap finding this VMA in the time
+ * between the VMA being inserted into the VMA tree and the
+ * completion/success hook being invoked.
+ *
+ * This is because we establish a per-VMA hugetlb lock which can
+ * be raced by rmap.
+ */
+ desc->action.hide_from_rmap_until_complete = true;
+ }
return ret;
}
@@ -1220,7 +1240,7 @@ static void init_once(void *foo)
static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
- .mmap = hugetlbfs_file_mmap,
+ .mmap_prepare = hugetlbfs_file_mmap_prepare,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,