summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 15:34:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 15:34:41 -0800
commitdb74a7d02ae244ec0552d18f51054f9ae0d921ad (patch)
tree5f5de7b84f050774b6ae8593e2535be03c9aeaa6 /fs/nfsd
parent4664fb427c8fd0080f40109f5e2b2090a6fb0c84 (diff)
parent4be9e04ebf75a5c4478c1c6295e2122e5dc98f5f (diff)
Merge tag 'vfs-6.19-rc1.directory.delegations' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull directory delegations update from Christian Brauner: "This contains the work for recall-only directory delegations for knfsd. Add support for simple, recallable-only directory delegations. This was decided at the fall NFS Bakeathon where the NFS client and server maintainers discussed how to merge directory delegation support. The approach starts with recallable-only delegations for several reasons: 1. RFC8881 has gaps that are being addressed in RFC8881bis. In particular, it requires directory position information for CB_NOTIFY callbacks, which is difficult to implement properly under Linux. The spec is being extended to allow that information to be omitted. 2. Client-side support for CB_NOTIFY still lags. The client side involves heuristics about when to request a delegation. 3. Early indication shows simple, recallable-only delegations can help performance. Anna Schumaker mentioned seeing a multi-minute speedup in xfstests runs with them enabled. With these changes, userspace can also request a read lease on a directory that will be recalled on conflicting accesses. This may be useful for applications like Samba. Users can disable leases altogether via the fs.leases-enable sysctl if needed. VFS changes: - Dedicated Type for Delegations Introduce struct delegated_inode to track inodes that may have delegations that need to be broken. This replaces the previous approach of passing raw inode pointers through the delegation breaking code paths, providing better type safety and clearer semantics for the delegation machinery. - Break parent directory delegations in open(..., O_CREAT) codepath - Allow mkdir to wait for delegation break on parent - Allow rmdir to wait for delegation break on parent - Add try_break_deleg calls for parents to vfs_link(), vfs_rename(), and vfs_unlink() - Make vfs_create(), vfs_mknod(), and vfs_symlink() break delegations on parent directory - Clean up argument list for vfs_create() - Expose delegation support to userland Filelock changes: - Make lease_alloc() take a flags argument - Rework the __break_lease API to use flags - Add struct delegated_inode - Push the S_ISREG check down to ->setlease handlers - Lift the ban on directory leases in generic_setlease NFSD changes: - Allow filecache to hold S_IFDIR files - Allow DELEGRETURN on directories - Wire up GET_DIR_DELEGATION handling Fixes: - Fix kernel-doc warnings in __fcntl_getlease - Add needed headers for new struct delegation definition" * tag 'vfs-6.19-rc1.directory.delegations' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: vfs: add needed headers for new struct delegation definition filelock: __fcntl_getlease: fix kernel-doc warnings vfs: expose delegation support to userland nfsd: wire up GET_DIR_DELEGATION handling nfsd: allow DELEGRETURN on directories nfsd: allow filecache to hold S_IFDIR files filelock: lift the ban on directory leases in generic_setlease vfs: make vfs_symlink break delegations on parent dir vfs: make vfs_mknod break delegations on parent directory vfs: make vfs_create break delegations on parent directory vfs: clean up argument list for vfs_create() vfs: break parent dir delegations in open(..., O_CREAT) codepath vfs: allow rmdir to wait for delegation break on parent vfs: allow mkdir to wait for delegation break on parent vfs: add try_break_deleg calls for parents to vfs_{link,rename,unlink} filelock: push the S_ISREG check down to ->setlease handlers filelock: add struct delegated_inode filelock: rework the __break_lease API to use flags filelock: make lease_alloc() take a flags argument
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/filecache.c57
-rw-r--r--fs/nfsd/filecache.h2
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4proc.c22
-rw-r--r--fs/nfsd/nfs4recover.c6
-rw-r--r--fs/nfsd/nfs4state.c103
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c16
-rw-r--r--fs/nfsd/vfs.h2
9 files changed, 186 insertions, 29 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index a238b6725008..93798575b807 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -1086,7 +1086,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
struct auth_domain *client,
struct svc_fh *fhp,
unsigned int may_flags, struct file *file,
- struct nfsd_file **pnf, bool want_gc)
+ umode_t type, bool want_gc, struct nfsd_file **pnf)
{
unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
struct nfsd_file *new, *nf;
@@ -1097,13 +1097,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
int ret;
retry:
- if (rqstp) {
- status = fh_verify(rqstp, fhp, S_IFREG,
+ if (rqstp)
+ status = fh_verify(rqstp, fhp, type,
may_flags|NFSD_MAY_OWNER_OVERRIDE);
- } else {
- status = fh_verify_local(net, cred, client, fhp, S_IFREG,
+ else
+ status = fh_verify_local(net, cred, client, fhp, type,
may_flags|NFSD_MAY_OWNER_OVERRIDE);
- }
+
if (status != nfs_ok)
return status;
inode = d_inode(fhp->fh_dentry);
@@ -1176,15 +1176,18 @@ out:
open_file:
trace_nfsd_file_alloc(nf);
- nf->nf_mark = nfsd_file_mark_find_or_create(inode);
- if (nf->nf_mark) {
+
+ if (type == S_IFREG)
+ nf->nf_mark = nfsd_file_mark_find_or_create(inode);
+
+ if (type != S_IFREG || nf->nf_mark) {
if (file) {
get_file(file);
nf->nf_file = file;
status = nfs_ok;
trace_nfsd_file_opened(nf, status);
} else {
- ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file);
+ ret = nfsd_open_verified(fhp, type, may_flags, &nf->nf_file);
if (ret == -EOPENSTALE && stale_retry) {
stale_retry = false;
nfsd_file_unhash(nf);
@@ -1246,7 +1249,7 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
- fhp, may_flags, NULL, pnf, true);
+ fhp, may_flags, NULL, S_IFREG, true, pnf);
}
/**
@@ -1271,7 +1274,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
- fhp, may_flags, NULL, pnf, false);
+ fhp, may_flags, NULL, S_IFREG, false, pnf);
}
/**
@@ -1314,8 +1317,8 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
const struct cred *save_cred = get_current_cred();
__be32 beres;
- beres = nfsd_file_do_acquire(NULL, net, cred, client,
- fhp, may_flags, NULL, pnf, false);
+ beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags,
+ NULL, S_IFREG, false, pnf);
put_cred(revert_creds(save_cred));
return beres;
}
@@ -1344,7 +1347,33 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file **pnf)
{
return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
- fhp, may_flags, file, pnf, false);
+ fhp, may_flags, file, S_IFREG, false, pnf);
+}
+
+/**
+ * nfsd_file_acquire_dir - Get a struct nfsd_file with an open directory
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file to be opened
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put(). This opens directories only, and only
+ * in O_RDONLY mode.
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file **pnf)
+{
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, fhp,
+ NFSD_MAY_READ|NFSD_MAY_64BIT_COOKIE,
+ NULL, S_IFDIR, false, pnf);
}
/*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e3d6ca2b6030..b383dbc5b921 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -82,5 +82,7 @@ __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
struct auth_domain *client, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf);
+__be32 nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file **pnf);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b6d03e1ef5f7..2cb972b5ed99 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -344,7 +344,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = fh_fill_pre_attrs(fhp);
if (status != nfs_ok)
goto out;
- host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, child, iap->ia_mode, NULL);
if (host_err < 0) {
status = nfserrno(host_err);
goto out;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7f7e6bb23a90..5cc78f4a81b8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2342,6 +2342,13 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
union nfsd4_op_u *u)
{
struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ struct nfs4_delegation *dd;
+ struct nfsd_file *nf;
+ __be32 status;
+
+ status = nfsd_file_acquire_dir(rqstp, &cstate->current_fh, &nf);
+ if (status != nfs_ok)
+ return status;
/*
* RFC 8881, section 18.39.3 says:
@@ -2355,7 +2362,20 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
* return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this
* situation.
*/
- gdd->gddrnf_status = GDD4_UNAVAIL;
+ dd = nfsd_get_dir_deleg(cstate, gdd, nf);
+ nfsd_file_put(nf);
+ if (IS_ERR(dd)) {
+ int err = PTR_ERR(dd);
+
+ if (err != -EAGAIN)
+ return nfserrno(err);
+ gdd->gddrnf_status = GDD4_UNAVAIL;
+ return nfs_ok;
+ }
+
+ gdd->gddrnf_status = GDD4_OK;
+ memcpy(&gdd->gddr_stateid, &dd->dl_stid.sc_stateid, sizeof(gdd->gddr_stateid));
+ nfs4_put_stid(&dd->dl_stid);
return nfs_ok;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e2b9472e5c78..30bae93931d9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+ dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, 0700, NULL);
if (IS_ERR(dentry))
status = PTR_ERR(dentry);
out_put:
@@ -337,7 +337,7 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
status = -ENOENT;
if (d_really_is_negative(dentry))
goto out;
- status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry, NULL);
out:
dput(dentry);
out_unlock:
@@ -427,7 +427,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
if (nfs4_has_reclaimed_state(name, nn))
goto out_free;
- status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8a6960500217..6791fc239dbd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7859,7 +7859,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
+ status = fh_verify(rqstp, &cstate->current_fh, 0, 0);
+ if (status)
return status;
status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn);
@@ -9377,3 +9378,103 @@ out_status:
nfs4_put_stid(&dp->dl_stid);
return status;
}
+
+/**
+ * nfsd_get_dir_deleg - attempt to get a directory delegation
+ * @cstate: compound state
+ * @gdd: GET_DIR_DELEGATION arg/resp structure
+ * @nf: nfsd_file opened on the directory
+ *
+ * Given a GET_DIR_DELEGATION request @gdd, attempt to acquire a delegation
+ * on the directory to which @nf refers. Note that this does not set up any
+ * sort of async notifications for the delegation.
+ */
+struct nfs4_delegation *
+nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
+ struct nfsd4_get_dir_delegation *gdd,
+ struct nfsd_file *nf)
+{
+ struct nfs4_client *clp = cstate->clp;
+ struct nfs4_delegation *dp;
+ struct file_lease *fl;
+ struct nfs4_file *fp, *rfp;
+ int status = 0;
+
+ fp = nfsd4_alloc_file();
+ if (!fp)
+ return ERR_PTR(-ENOMEM);
+
+ nfsd4_file_init(&cstate->current_fh, fp);
+
+ rfp = nfsd4_file_hash_insert(fp, &cstate->current_fh);
+ if (unlikely(!rfp)) {
+ put_nfs4_file(fp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (rfp != fp) {
+ put_nfs4_file(fp);
+ fp = rfp;
+ }
+
+ /* if this client already has one, return that it's unavailable */
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ /* existing delegation? */
+ if (nfs4_delegation_exists(clp, fp)) {
+ status = -EAGAIN;
+ } else if (!fp->fi_deleg_file) {
+ fp->fi_deleg_file = nfsd_file_get(nf);
+ fp->fi_delegees = 1;
+ } else {
+ ++fp->fi_delegees;
+ }
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+ if (status) {
+ put_nfs4_file(fp);
+ return ERR_PTR(status);
+ }
+
+ /* Try to set up the lease */
+ status = -ENOMEM;
+ dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ);
+ if (!dp)
+ goto out_delegees;
+
+ fl = nfs4_alloc_init_lease(dp);
+ if (!fl)
+ goto out_put_stid;
+
+ status = kernel_setlease(nf->nf_file,
+ fl->c.flc_type, &fl, NULL);
+ if (fl)
+ locks_free_lease(fl);
+ if (status)
+ goto out_put_stid;
+
+ /*
+ * Now, try to hash it. This can fail if we race another nfsd task
+ * trying to set a delegation on the same file. If that happens,
+ * then just say UNAVAIL.
+ */
+ spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
+ spin_lock(&fp->fi_lock);
+ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
+ spin_unlock(&state_lock);
+
+ if (!status)
+ return dp;
+
+ /* Something failed. Drop the lease and clean up the stid */
+ kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
+out_put_stid:
+ nfs4_put_stid(&dp->dl_stid);
+out_delegees:
+ put_deleg_file(fp);
+ return ERR_PTR(status);
+}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 1e736f402426..b052c1effdc5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -867,4 +867,9 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
struct dentry *dentry, struct nfs4_delegation **pdp);
+
+struct nfsd4_get_dir_delegation;
+struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
+ struct nfsd4_get_dir_delegation *gdd,
+ struct nfsd_file *nf);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cf4062ac092a..6642620b6e9e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -959,15 +959,16 @@ retry:
/**
* nfsd_open_verified - Open a regular file for the filecache
* @fhp: NFS filehandle of the file to open
+ * @type: S_IFMT inode type allowed (0 means any type is allowed)
* @may_flags: internal permission flags
* @filp: OUT: open "struct file *"
*
* Returns zero on success, or a negative errno value.
*/
int
-nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp)
+nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{
- return __nfsd_open(fhp, S_IFREG, may_flags, filp);
+ return __nfsd_open(fhp, type, may_flags, filp);
}
/*
@@ -1552,13 +1553,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = 0;
switch (type) {
case S_IFREG:
- host_err = vfs_create(&nop_mnt_idmap, dirp, dchild,
- iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, dchild, iap->ia_mode, NULL);
if (!host_err)
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
+ dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, NULL);
if (IS_ERR(dchild)) {
host_err = PTR_ERR(dchild);
} else if (d_is_negative(dchild)) {
@@ -1574,7 +1574,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFIFO:
case S_IFSOCK:
host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
- iap->ia_mode, rdev);
+ iap->ia_mode, rdev, NULL);
break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
@@ -1743,7 +1743,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = fh_fill_pre_attrs(fhp);
if (err != nfs_ok)
goto out_unlock;
- host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
+ host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path, NULL);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
@@ -2108,7 +2108,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
break;
}
} else {
- host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry);
+ host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry, NULL);
}
fh_fill_post_attrs(fhp);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0c0292611c6d..09de48c50cbe 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -114,7 +114,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-int nfsd_open_verified(struct svc_fh *fhp, int may_flags,
+int nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags,
struct file **filp);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,