summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/s390/include/uapi/asm/ipl.h1
-rw-r--r--arch/s390/kernel/ipl.c48
-rw-r--r--arch/s390/kernel/stacktrace.c18
-rw-r--r--arch/s390/pci/pci.c7
-rw-r--r--arch/s390/pci/pci_bus.c98
-rw-r--r--arch/s390/pci/pci_bus.h15
-rw-r--r--arch/x86/kernel/unwind_orc.c39
-rw-r--r--arch/x86/net/bpf_jit_comp.c12
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/qgroup.c27
-rw-r--r--fs/btrfs/tests/qgroup-tests.c1
-rw-r--r--fs/btrfs/tree-log.c46
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/file_attr.c2
-rw-r--r--fs/libfs.c50
-rw-r--r--fs/notify/fsnotify.c9
-rw-r--r--fs/smb/common/smbdirect/smbdirect_socket.h12
-rw-r--r--fs/smb/server/mgmt/user_session.c4
-rw-r--r--fs/smb/server/smb2pdu.c4
-rw-r--r--fs/smb/server/smbacl.c3
-rw-r--r--fs/smb/server/transport_rdma.c175
-rw-r--r--fs/smb/server/vfs.c2
-rw-r--r--include/linux/bpf.h3
-rw-r--r--include/linux/fs.h2
-rw-r--r--kernel/bpf/core.c16
-rw-r--r--kernel/bpf/dmabuf_iter.c56
-rw-r--r--kernel/cgroup/rstat.c13
-rw-r--r--kernel/sched/ext.c72
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--mm/shmem.c32
-rw-r--r--net/smc/Kconfig4
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/lib/bpf/libbpf.c7
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c23
-rw-r--r--tools/testing/selftests/sched_ext/runner.c8
41 files changed, 707 insertions, 252 deletions
diff --git a/.clang-format b/.clang-format
index 2ceca764122f..c7060124a47a 100644
--- a/.clang-format
+++ b/.clang-format
@@ -748,6 +748,7 @@ ForEachMacros:
- 'ynl_attr_for_each_nested'
- 'ynl_attr_for_each_payload'
- 'zorro_for_each_dev'
+ - 'zpci_bus_for_each'
IncludeBlocks: Preserve
IncludeCategories:
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 74dd29816f36..b6eb7a465ad2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
return;
- if (capable(CAP_SYS_ADMIN))
+ if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
return;
if (supports_clearbhb(SCOPE_SYSTEM)) {
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 2cd28af50dd4..3d64a2251699 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -15,6 +15,7 @@ struct ipl_pl_hdr {
#define IPL_PL_FLAG_IPLPS 0x80
#define IPL_PL_FLAG_SIPL 0x40
#define IPL_PL_FLAG_IPLSR 0x20
+#define IPL_PL_FLAG_SBP 0x10
/* IPL Parameter Block header */
struct ipl_pb_hdr {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 961a3d60a4dd..dcdc7e274848 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
+#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value) \
+ IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long value; \
+ if (sscanf(buf, _fmt_in, &value) != 1) \
+ return -EINVAL; \
+ (_value) = value; \
+ (_hdr).flags &= ~IPL_PL_FLAG_SBP; \
+ return len; \
+} \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, 0644, \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store)
+
#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \
static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
@@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
reipl_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
reipl_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
- reipl_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
reipl_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
reipl_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+ reipl_block_fcp->hdr,
+ reipl_block_fcp->fcp.bootprog);
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
@@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
reipl_block_nvme->nvme.fid);
DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
reipl_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
- reipl_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+ reipl_block_nvme->hdr,
+ reipl_block_nvme->nvme.bootprog);
static struct attribute *reipl_nvme_attrs[] = {
&sys_reipl_nvme_fid_attr.attr,
@@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
};
DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
- reipl_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+ reipl_block_eckd->hdr,
+ reipl_block_eckd->eckd.bootprog);
static struct attribute *reipl_eckd_attrs[] = {
&sys_reipl_eckd_device_attr.attr,
@@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
dump_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
dump_block_fcp->fcp.lun);
-DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
- dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
dump_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
dump_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+ dump_block_fcp->hdr,
+ dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
dump_block_fcp->fcp,
@@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
dump_block_nvme->nvme.fid);
DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
dump_block_nvme->nvme.nsid);
-DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
- dump_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
dump_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+ dump_block_nvme->hdr,
+ dump_block_nvme->nvme.bootprog);
DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
dump_block_nvme->nvme,
@@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = {
/* ECKD dump device attributes */
DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
-DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
- dump_block_eckd->eckd.bootprog);
+DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+ dump_block_eckd->hdr,
+ dump_block_eckd->eckd.bootprog);
IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 3aae7f70e6ab..18520d333058 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
struct stack_frame_vdso_wrapper __user *sf_vdso;
struct stack_frame_user __user *sf;
unsigned long ip, sp;
- bool first = true;
if (!current->mm)
return;
@@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
if (__get_user(ip, &sf->gprs[8]))
break;
}
- /* Sanity check: ABI requires SP to be 8 byte aligned. */
- if (sp & 0x7)
+ /* Validate SP and RA (ABI requires SP to be 8 byte aligned). */
+ if (sp & 0x7 || ip_invalid(ip))
break;
- if (ip_invalid(ip)) {
- /*
- * If the instruction address is invalid, and this
- * is the first stack frame, assume r14 has not
- * been written to the stack yet. Otherwise exit.
- */
- if (!first)
- break;
- ip = regs->gprs[14];
- if (ip_invalid(ip))
- break;
- }
if (!store_ip(consume_entry, cookie, entry, perf, ip))
break;
- first = false;
}
pagefault_enable();
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 5a6ace9d875a..57f3980b98a9 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev)
}
void zpci_release_device(struct kref *kref)
+ __releases(&zpci_list_lock)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
@@ -1148,6 +1149,7 @@ static void zpci_add_devices(struct list_head *scan_list)
int zpci_scan_devices(void)
{
+ struct zpci_bus *zbus;
LIST_HEAD(scan_list);
int rc;
@@ -1156,7 +1158,10 @@ int zpci_scan_devices(void)
return rc;
zpci_add_devices(&scan_list);
- zpci_bus_scan_busses();
+ zpci_bus_for_each(zbus) {
+ zpci_bus_scan_bus(zbus);
+ cond_resched();
+ }
return 0;
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 66c4bd888b29..42a13e451f64 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
return ret;
}
-/* zpci_bus_scan_busses - Scan all registered busses
- *
- * Scan all available zbusses
- *
- */
-void zpci_bus_scan_busses(void)
-{
- struct zpci_bus *zbus = NULL;
-
- mutex_lock(&zbus_list_lock);
- list_for_each_entry(zbus, &zbus_list, bus_next) {
- zpci_bus_scan_bus(zbus);
- cond_resched();
- }
- mutex_unlock(&zbus_list_lock);
-}
-
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
@@ -222,10 +205,29 @@ out_free_domain:
return -ENOMEM;
}
-static void zpci_bus_release(struct kref *kref)
+/**
+ * zpci_bus_release - Un-initialize resources associated with the zbus and
+ * free memory
+ * @kref: refcount * that is part of struct zpci_bus
+ *
+ * MUST be called with `zbus_list_lock` held, but the lock is released during
+ * run of the function.
+ */
+static inline void zpci_bus_release(struct kref *kref)
+ __releases(&zbus_list_lock)
{
struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+ lockdep_assert_held(&zbus_list_lock);
+
+ list_del(&zbus->bus_next);
+ mutex_unlock(&zbus_list_lock);
+
+ /*
+ * At this point no-one should see this object, or be able to get a new
+ * reference to it.
+ */
+
if (zbus->bus) {
pci_lock_rescan_remove();
pci_stop_root_bus(zbus->bus);
@@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
pci_unlock_rescan_remove();
}
- mutex_lock(&zbus_list_lock);
- list_del(&zbus->bus_next);
- mutex_unlock(&zbus_list_lock);
zpci_remove_parent_msi_domain(zbus);
kfree(zbus);
}
-static void zpci_bus_put(struct zpci_bus *zbus)
+static inline void __zpci_bus_get(struct zpci_bus *zbus)
+{
+ lockdep_assert_held(&zbus_list_lock);
+ kref_get(&zbus->kref);
+}
+
+static inline void zpci_bus_put(struct zpci_bus *zbus)
{
- kref_put(&zbus->kref, zpci_bus_release);
+ kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
}
static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
@@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
if (!zbus->multifunction)
continue;
if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
- kref_get(&zbus->kref);
+ __zpci_bus_get(zbus);
goto out_unlock;
}
}
@@ -268,6 +273,44 @@ out_unlock:
return zbus;
}
+/**
+ * zpci_bus_get_next - get the next zbus object from given position in the list
+ * @pos: current position/cursor in the global zbus list
+ *
+ * Acquires and releases references as the cursor iterates (might also free/
+ * release the cursor). Is tolerant of concurrent operations on the list.
+ *
+ * To begin the iteration, set *@pos to %NULL before calling the function.
+ *
+ * *@pos is set to %NULL in cases where either the list is empty, or *@pos is
+ * the last element in the list.
+ *
+ * Context: Process context. May sleep.
+ */
+void zpci_bus_get_next(struct zpci_bus **pos)
+{
+ struct zpci_bus *curp = *pos, *next = NULL;
+
+ mutex_lock(&zbus_list_lock);
+ if (curp)
+ next = list_next_entry(curp, bus_next);
+ else
+ next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
+
+ if (list_entry_is_head(next, &zbus_list, bus_next))
+ next = NULL;
+
+ if (next)
+ __zpci_bus_get(next);
+
+ *pos = next;
+ mutex_unlock(&zbus_list_lock);
+
+ /* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
+ if (curp)
+ zpci_bus_put(curp);
+}
+
static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
@@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
zbus->topo = topo;
zbus->topo_is_tid = topo_is_tid;
INIT_LIST_HEAD(&zbus->bus_next);
- mutex_lock(&zbus_list_lock);
- list_add_tail(&zbus->bus_next, &zbus_list);
- mutex_unlock(&zbus_list_lock);
kref_init(&zbus->kref);
INIT_LIST_HEAD(&zbus->resources);
@@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
zbus->bus_resource.flags = IORESOURCE_BUS;
pci_add_resource(&zbus->resources, &zbus->bus_resource);
+ mutex_lock(&zbus_list_lock);
+ list_add_tail(&zbus->bus_next, &zbus_list);
+ mutex_unlock(&zbus_list_lock);
+
return zbus;
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index ae3d7a9159bd..e440742e3145 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
void zpci_bus_device_unregister(struct zpci_dev *zdev);
int zpci_bus_scan_bus(struct zpci_bus *zbus);
-void zpci_bus_scan_busses(void);
+void zpci_bus_get_next(struct zpci_bus **pos);
+
+/**
+ * zpci_bus_for_each - iterate over all the registered zbus objects
+ * @pos: a struct zpci_bus * as cursor
+ *
+ * Acquires and releases references as the cursor iterates over the registered
+ * objects. Is tolerant against concurrent removals of objects.
+ *
+ * Context: Process context. May sleep.
+ */
+#define zpci_bus_for_each(pos) \
+ for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
+ zpci_bus_get_next(&(pos)))
int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 977ee75e047c..f610fde2d5c4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+};
+
+static struct orc_entry *orc_bpf_find(unsigned long ip)
+{
+#ifdef CONFIG_BPF_JIT
+ if (bpf_has_frame_pointer(ip))
+ return &orc_fp_entry;
+#endif
+
+ return NULL;
+}
+
/*
* If we crash with IP==0, the last successfully executed instruction
* was probably an indirect function call with a NULL function pointer,
@@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
-/* Fake frame pointer entry -- used as a fallback for generated code */
-static struct orc_entry orc_fp_entry = {
- .type = ORC_TYPE_CALL,
- .sp_reg = ORC_REG_BP,
- .sp_offset = 16,
- .bp_reg = ORC_REG_PREV_SP,
- .bp_offset = -16,
-};
-
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
+ /* BPF lookup: */
+ orc = orc_bpf_find(ip);
+ if (orc)
+ return orc;
+
return orc_ftrace_find(ip);
}
@@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (!orc) {
/*
* As a fallback, try to assume this code uses a frame pointer.
- * This is useful for generated code, like BPF, which ORC
- * doesn't know about. This is just a guess, so the rest of
- * the unwind is no longer considered reliable.
+ * This is just a guess, so the rest of the unwind is no longer
+ * considered reliable.
*/
orc = &orc_fp_entry;
state->error = true;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b69dc7194e2c..b0bac2a66eff 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+
+ bpf_prog->aux->ksym.fp_start = prog - temp;
+
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -2736,6 +2739,8 @@ emit_jmp:
pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
+ bpf_prog->aux->ksym.fp_end = prog - temp;
+
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ if (im)
+ im->ksym.fp_start = prog - (u8 *)rw_image;
+
if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
+
EMIT1(0xC9); /* leave */
+ if (im)
+ im->ksym.fp_end = prog - (u8 *)rw_image;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7a501e73d880..1abc7ed2990e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2019,13 +2019,14 @@ out:
else
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space, true);
- extent_changeset_free(data_reserved);
out_noreserve:
if (only_release_metadata)
btrfs_check_nocow_unlock(inode);
sb_end_pagefault(inode->vfs_inode.i_sb);
+ extent_changeset_free(data_reserved);
+
if (ret < 0)
return vmf_error(ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c4bee47829ed..317db7d10a21 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -256,6 +256,7 @@ static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off
if (ret < 0) {
btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d",
logical, ret);
+ btrfs_release_path(&path);
return;
}
eb = path.nodes[0];
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9e2b53e90dcb..d9d8d9968a58 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1243,14 +1243,7 @@ out:
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_end_transaction(trans);
-
- /*
- * At this point we either failed at allocating prealloc, or we
- * succeeded and passed the ownership to it to add_qgroup_rb(). In any
- * case, this needs to be NULL or there is something wrong.
- */
- ASSERT(prealloc == NULL);
-
+ kfree(prealloc);
return ret;
}
@@ -1682,12 +1675,7 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
- /*
- * At this point we either failed at allocating prealloc, or we
- * succeeded and passed the ownership to it to add_qgroup_rb(). In any
- * case, this needs to be NULL or there is something wrong.
- */
- ASSERT(prealloc == NULL);
+ kfree(prealloc);
return ret;
}
@@ -3279,7 +3267,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
- struct btrfs_qgroup *prealloc = NULL;
+ struct btrfs_qgroup *prealloc;
struct btrfs_qgroup_list **qlist_prealloc = NULL;
bool free_inherit = false;
bool need_rescan = false;
@@ -3520,14 +3508,7 @@ out:
}
if (free_inherit)
kfree(inherit);
-
- /*
- * At this point we either failed at allocating prealloc, or we
- * succeeded and passed the ownership to it to add_qgroup_rb(). In any
- * case, this needs to be NULL or there is something wrong.
- */
- ASSERT(prealloc == NULL);
-
+ kfree(prealloc);
return ret;
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 05cfda8af422..e9124605974b 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -187,7 +187,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
test_err("couldn't find backref %d", ret);
- btrfs_free_path(path);
return ret;
}
btrfs_del_item(&trans, root, path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fff37c8d96a4..31edc93a383e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5865,14 +5865,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_inode *curr_inode = start_inode;
int ret = 0;
- /*
- * If we are logging a new name, as part of a link or rename operation,
- * don't bother logging new dentries, as we just want to log the names
- * of an inode and that any new parents exist.
- */
- if (ctx->logging_new_name)
- return 0;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -6051,6 +6043,33 @@ static int conflicting_inode_is_dir(struct btrfs_root *root, u64 ino,
return ret;
}
+static bool can_log_conflicting_inode(const struct btrfs_trans_handle *trans,
+ const struct btrfs_inode *inode)
+{
+ if (!S_ISDIR(inode->vfs_inode.i_mode))
+ return true;
+
+ if (inode->last_unlink_trans < trans->transid)
+ return true;
+
+ /*
+ * If this is a directory and its unlink_trans is not from a past
+ * transaction then we must fallback to a transaction commit in order
+ * to avoid getting a directory with 2 hard links after log replay.
+ *
+ * This happens if a directory A is renamed, moved from one parent
+ * directory to another one, a new file is created in the old parent
+ * directory with the old name of our directory A, the new file is
+ * fsynced, then we moved the new file to some other parent directory
+ * and fsync again the new file. This results in a log tree where we
+ * logged that directory A existed, with the INODE_REF item for the
+ * new location but without having logged its old parent inode, so
+ * that on log replay we add a new link for the new location but the
+ * old link remains, resulting in a link count of 2.
+ */
+ return false;
+}
+
static int add_conflicting_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -6154,6 +6173,11 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
return 0;
}
+ if (!can_log_conflicting_inode(trans, inode)) {
+ btrfs_add_delayed_iput(inode);
+ return BTRFS_LOG_FORCE_COMMIT;
+ }
+
btrfs_add_delayed_iput(inode);
ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
@@ -6218,6 +6242,12 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
break;
}
+ if (!can_log_conflicting_inode(trans, inode)) {
+ btrfs_add_delayed_iput(inode);
+ ret = BTRFS_LOG_FORCE_COMMIT;
+ break;
+ }
+
/*
* Always log the directory, we cannot make this
* conditional on need_log_inode() because the directory
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ae1742a35e76..13c514684cfb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7128,6 +7128,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
fs_devices->seeding = true;
fs_devices->opened = 1;
+ list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
return fs_devices;
}
diff --git a/fs/file_attr.c b/fs/file_attr.c
index 4c4916632f11..13cdb31a3e94 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -2,6 +2,7 @@
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/fscrypt.h>
+#include <linux/fsnotify.h>
#include <linux/fileattr.h>
#include <linux/export.h>
#include <linux/syscalls.h>
@@ -298,6 +299,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
err = inode->i_op->fileattr_set(idmap, dentry, fa);
if (err)
goto out;
+ fsnotify_xattr(dentry);
}
out:
diff --git a/fs/libfs.c b/fs/libfs.c
index 9264523be85c..591eb649ebba 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -346,22 +346,22 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
* User space expects the directory offset value of the replaced
* (new) directory entry to be unchanged after a rename.
*
- * Returns zero on success, a negative errno value on failure.
+ * Caller must have grabbed a slot for new_dentry in the maple_tree
+ * associated with new_dir, even if dentry is negative.
*/
-int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
long new_offset = dentry2offset(new_dentry);
- simple_offset_remove(old_ctx, old_dentry);
+ if (WARN_ON(!new_offset))
+ return;
- if (new_offset) {
- offset_set(new_dentry, 0);
- return simple_offset_replace(new_ctx, old_dentry, new_offset);
- }
- return simple_offset_add(new_ctx, old_dentry);
+ simple_offset_remove(old_ctx, old_dentry);
+ offset_set(new_dentry, 0);
+ WARN_ON(simple_offset_replace(new_ctx, old_dentry, new_offset));
}
/**
@@ -388,31 +388,23 @@ int simple_offset_rename_exchange(struct inode *old_dir,
long new_index = dentry2offset(new_dentry);
int ret;
- simple_offset_remove(old_ctx, old_dentry);
- simple_offset_remove(new_ctx, new_dentry);
+ if (WARN_ON(!old_index || !new_index))
+ return -EINVAL;
- ret = simple_offset_replace(new_ctx, old_dentry, new_index);
- if (ret)
- goto out_restore;
+ ret = mtree_store(&new_ctx->mt, new_index, old_dentry, GFP_KERNEL);
+ if (WARN_ON(ret))
+ return ret;
- ret = simple_offset_replace(old_ctx, new_dentry, old_index);
- if (ret) {
- simple_offset_remove(new_ctx, old_dentry);
- goto out_restore;
+ ret = mtree_store(&old_ctx->mt, old_index, new_dentry, GFP_KERNEL);
+ if (WARN_ON(ret)) {
+ mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
+ return ret;
}
- ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
- if (ret) {
- simple_offset_remove(new_ctx, old_dentry);
- simple_offset_remove(old_ctx, new_dentry);
- goto out_restore;
- }
+ offset_set(old_dentry, new_index);
+ offset_set(new_dentry, old_index);
+ simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
return 0;
-
-out_restore:
- (void)simple_offset_replace(old_ctx, old_dentry, old_index);
- (void)simple_offset_replace(new_ctx, new_dentry, new_index);
- return ret;
}
/**
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index d27ff5e5f165..71bd44e5ab6d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -270,8 +270,15 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
/*
* Include parent/name in notification either if some notification
* groups require parent info or the parent is interested in this event.
+ * The parent interest in ACCESS/MODIFY events does not apply to special
+ * files, where read/write are not on the filesystem of the parent and
+ * events can provide an undesirable side-channel for information
+ * exfiltration.
*/
- parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS &&
+ !(data_type == FSNOTIFY_EVENT_PATH &&
+ d_is_special(dentry) &&
+ (mask & (FS_ACCESS | FS_MODIFY)));
if (parent_needed || parent_interested) {
/* When notifying parent, child should be passed as data */
WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
index 384b19177e1c..ee4c2726771a 100644
--- a/fs/smb/common/smbdirect/smbdirect_socket.h
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -133,6 +133,14 @@ struct smbdirect_socket {
struct smbdirect_socket_parameters parameters;
/*
+ * The state for connect/negotiation
+ */
+ struct {
+ spinlock_t lock;
+ struct work_struct work;
+ } connect;
+
+ /*
* The state for keepalive and timeout handling
*/
struct {
@@ -353,6 +361,10 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work);
disable_work_sync(&sc->disconnect_work);
+ spin_lock_init(&sc->connect.lock);
+ INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work);
+ disable_work_sync(&sc->connect.work);
+
INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work);
disable_work_sync(&sc->idle.immediate_work);
INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 1c181ef99929..7d880ff34402 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -325,8 +325,10 @@ struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
sess = ksmbd_session_lookup(conn, id);
if (!sess && conn->binding)
sess = ksmbd_session_lookup_slowpath(id);
- if (sess && sess->state != SMB2_SESSION_VALID)
+ if (sess && sess->state != SMB2_SESSION_VALID) {
+ ksmbd_user_session_put(sess);
sess = NULL;
+ }
return sess;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 27f87a13f20a..8aa483800014 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2363,7 +2363,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
int rc = 0;
unsigned int next = 0;
- if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+ if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + 1 +
le16_to_cpu(eabuf->EaValueLength))
return -EINVAL;
@@ -2440,7 +2440,7 @@ next:
break;
}
- if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+ if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + 1 +
le16_to_cpu(eabuf->EaValueLength)) {
rc = -EINVAL;
break;
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 5aa7a66334d9..05598d994a68 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1307,9 +1307,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
granted |= le32_to_cpu(ace->access_req);
ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
}
-
- if (!pdacl->num_aces)
- granted = GENERIC_ALL_FLAGS;
}
if (!uid)
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 4e7ab8d9314f..f585359684d4 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -242,6 +242,7 @@ static void smb_direct_disconnect_rdma_work(struct work_struct *work)
* disable[_delayed]_work_sync()
*/
disable_work(&sc->disconnect_work);
+ disable_work(&sc->connect.work);
disable_work(&sc->recv_io.posted.refill_work);
disable_delayed_work(&sc->idle.timer_work);
disable_work(&sc->idle.immediate_work);
@@ -297,6 +298,7 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc)
* not queued again but here we don't block and avoid
* disable[_delayed]_work_sync()
*/
+ disable_work(&sc->connect.work);
disable_work(&sc->recv_io.posted.refill_work);
disable_work(&sc->idle.immediate_work);
disable_delayed_work(&sc->idle.timer_work);
@@ -467,6 +469,7 @@ static void free_transport(struct smb_direct_transport *t)
*/
smb_direct_disconnect_wake_up_all(sc);
+ disable_work_sync(&sc->connect.work);
disable_work_sync(&sc->recv_io.posted.refill_work);
disable_delayed_work_sync(&sc->idle.timer_work);
disable_work_sync(&sc->idle.immediate_work);
@@ -635,28 +638,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
switch (sc->recv_io.expected) {
case SMBDIRECT_EXPECT_NEGOTIATE_REQ:
- if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) {
- put_recvmsg(sc, recvmsg);
- smb_direct_disconnect_rdma_connection(sc);
- return;
- }
- sc->recv_io.reassembly.full_packet_received = true;
- /*
- * Some drivers (at least mlx5_ib) might post a
- * recv completion before RDMA_CM_EVENT_ESTABLISHED,
- * we need to adjust our expectation in that case.
- */
- if (!sc->first_error && sc->status == SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)
- sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED;
- if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) {
- put_recvmsg(sc, recvmsg);
- smb_direct_disconnect_rdma_connection(sc);
- return;
- }
- sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING;
- enqueue_reassembly(sc, recvmsg, 0);
- wake_up(&sc->status_wait);
- return;
+ /* see smb_direct_negotiate_recv_done */
+ break;
case SMBDIRECT_EXPECT_DATA_TRANSFER: {
struct smbdirect_data_transfer *data_transfer =
(struct smbdirect_data_transfer *)recvmsg->packet;
@@ -742,6 +725,126 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
smb_direct_disconnect_rdma_connection(sc);
}
+static void smb_direct_negotiate_recv_work(struct work_struct *work);
+
+static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smbdirect_recv_io *recv_io =
+ container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe);
+ struct smbdirect_socket *sc = recv_io->socket;
+ unsigned long flags;
+
+ /*
+ * reset the common recv_done for later reuse.
+ */
+ recv_io->cqe.done = recv_done;
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ put_recvmsg(sc, recv_io);
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+ smb_direct_disconnect_rdma_connection(sc);
+ }
+ return;
+ }
+
+ ksmbd_debug(RDMA, "Negotiate Recv completed. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+
+ ib_dma_sync_single_for_cpu(sc->ib.dev,
+ recv_io->sge.addr,
+ recv_io->sge.length,
+ DMA_FROM_DEVICE);
+
+ /*
+ * This is an internal error!
+ */
+ if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) {
+ put_recvmsg(sc, recv_io);
+ smb_direct_disconnect_rdma_connection(sc);
+ return;
+ }
+
+ /*
+ * Don't reset timer to the keepalive interval in
+ * this will be done in smb_direct_negotiate_recv_work.
+ */
+
+ /*
+ * Only remember the recv_io if it has enough bytes,
+ * this gives smb_direct_negotiate_recv_work enough
+ * information in order to disconnect if it was not
+ * valid.
+ */
+ sc->recv_io.reassembly.full_packet_received = true;
+ if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req))
+ enqueue_reassembly(sc, recv_io, 0);
+ else
+ put_recvmsg(sc, recv_io);
+
+ /*
+ * Some drivers (at least mlx5_ib and irdma in roce mode)
+ * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED,
+ * we need to adjust our expectation in that case.
+ *
+ * So we defer further processing of the negotiation
+ * to smb_direct_negotiate_recv_work().
+ *
+ * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED
+ * we queue the work directly otherwise
+ * smb_direct_cm_handler() will do it, when
+ * RDMA_CM_EVENT_ESTABLISHED arrived.
+ */
+ spin_lock_irqsave(&sc->connect.lock, flags);
+ if (!sc->first_error) {
+ INIT_WORK(&sc->connect.work, smb_direct_negotiate_recv_work);
+ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)
+ queue_work(sc->workqueue, &sc->connect.work);
+ }
+ spin_unlock_irqrestore(&sc->connect.lock, flags);
+}
+
+static void smb_direct_negotiate_recv_work(struct work_struct *work)
+{
+ struct smbdirect_socket *sc =
+ container_of(work, struct smbdirect_socket, connect.work);
+ const struct smbdirect_socket_parameters *sp = &sc->parameters;
+ struct smbdirect_recv_io *recv_io;
+
+ if (sc->first_error)
+ return;
+
+ ksmbd_debug(RDMA, "Negotiate Recv Work running\n");
+
+ /*
+ * Reset timer to the keepalive interval in
+ * order to trigger our next keepalive message.
+ */
+ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE;
+ mod_delayed_work(sc->workqueue, &sc->idle.timer_work,
+ msecs_to_jiffies(sp->keepalive_interval_msec));
+
+ /*
+ * If smb_direct_negotiate_recv_done() detected an
+ * invalid request we want to disconnect.
+ */
+ recv_io = get_first_reassembly(sc);
+ if (!recv_io) {
+ smb_direct_disconnect_rdma_connection(sc);
+ return;
+ }
+
+ if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) {
+ smb_direct_disconnect_rdma_connection(sc);
+ return;
+ }
+ sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING;
+ wake_up(&sc->status_wait);
+}
+
static int smb_direct_post_recv(struct smbdirect_socket *sc,
struct smbdirect_recv_io *recvmsg)
{
@@ -758,7 +861,6 @@ static int smb_direct_post_recv(struct smbdirect_socket *sc,
return ret;
recvmsg->sge.length = sp->max_recv_size;
recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey;
- recvmsg->cqe.done = recv_done;
wr.wr_cqe = &recvmsg->cqe;
wr.next = NULL;
@@ -1732,6 +1834,7 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
struct smbdirect_socket *sc = cm_id->context;
+ unsigned long flags;
ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
cm_id, rdma_event_msg(event->event), event->event);
@@ -1739,18 +1842,27 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED: {
/*
- * Some drivers (at least mlx5_ib) might post a
- * recv completion before RDMA_CM_EVENT_ESTABLISHED,
+ * Some drivers (at least mlx5_ib and irdma in roce mode)
+ * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED,
* we need to adjust our expectation in that case.
*
- * As we already started the negotiation, we just
- * ignore RDMA_CM_EVENT_ESTABLISHED here.
+ * If smb_direct_negotiate_recv_done was called first
+ * it initialized sc->connect.work only for us to
+ * start, so that we turned into
+ * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before
+ * smb_direct_negotiate_recv_work() runs.
+ *
+ * If smb_direct_negotiate_recv_done didn't happen
+ * yet. sc->connect.work is still be disabled and
+ * queue_work() is a no-op.
*/
- if (!sc->first_error && sc->status > SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)
- break;
if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING))
break;
sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED;
+ spin_lock_irqsave(&sc->connect.lock, flags);
+ if (!sc->first_error)
+ queue_work(sc->workqueue, &sc->connect.work);
+ spin_unlock_irqrestore(&sc->connect.lock, flags);
wake_up(&sc->status_wait);
break;
}
@@ -1921,6 +2033,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc)
recvmsg = get_free_recvmsg(sc);
if (!recvmsg)
return -ENOMEM;
+ recvmsg->cqe.done = smb_direct_negotiate_recv_done;
ret = smb_direct_post_recv(sc, recvmsg);
if (ret) {
@@ -2339,6 +2452,7 @@ respond:
static int smb_direct_connect(struct smbdirect_socket *sc)
{
+ struct smbdirect_recv_io *recv_io;
int ret;
ret = smb_direct_init_params(sc);
@@ -2353,6 +2467,9 @@ static int smb_direct_connect(struct smbdirect_socket *sc)
return ret;
}
+ list_for_each_entry(recv_io, &sc->recv_io.free.list, list)
+ recv_io->cqe.done = recv_done;
+
ret = smb_direct_create_qpair(sc);
if (ret) {
pr_err("Can't accept RDMA client: %d\n", ret);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 98b0eb966d91..f891344bd76b 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -702,7 +702,7 @@ retry:
rd.old_parent = NULL;
rd.new_parent = new_path.dentry;
rd.flags = flags;
- rd.delegated_inode = NULL,
+ rd.delegated_inode = NULL;
err = start_renaming_dentry(&rd, lookup_flags, old_child, &new_last);
if (err)
goto out_drop_write;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6498be4c44f8..e5be698256d1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1283,6 +1283,8 @@ struct bpf_ksym {
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
+ u32 fp_start;
+ u32 fp_end;
};
enum bpf_tramp_prog_type {
@@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
+bool bpf_has_frame_pointer(unsigned long ip);
int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d..f5c9cf28c4dc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3247,7 +3247,7 @@ struct offset_ctx {
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
-int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
+void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c8ae6ab31651..1b9b18e5b03c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
NULL;
}
+bool bpf_has_frame_pointer(unsigned long ip)
+{
+ struct bpf_ksym *ksym;
+ unsigned long offset;
+
+ guard(rcu)();
+
+ ksym = bpf_ksym_find(ip);
+ if (!ksym || !ksym->fp_start || !ksym->fp_end)
+ return false;
+
+ offset = ip - ksym->start;
+
+ return offset >= ksym->fp_start && offset < ksym->fp_end;
+}
+
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;
diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c
index 4dd7ef7c145c..cd500248abd9 100644
--- a/kernel/bpf/dmabuf_iter.c
+++ b/kernel/bpf/dmabuf_iter.c
@@ -6,10 +6,33 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
+struct dmabuf_iter_priv {
+ /*
+ * If this pointer is non-NULL, the buffer's refcount is elevated to
+ * prevent destruction between stop/start. If reading is not resumed and
+ * start is never called again, then dmabuf_iter_seq_fini drops the
+ * reference when the iterator is released.
+ */
+ struct dma_buf *dmabuf;
+};
+
static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
- if (*pos)
- return NULL;
+ struct dmabuf_iter_priv *p = seq->private;
+
+ if (*pos) {
+ struct dma_buf *dmabuf = p->dmabuf;
+
+ if (!dmabuf)
+ return NULL;
+
+ /*
+ * Always resume from where we stopped, regardless of the value
+ * of pos.
+ */
+ p->dmabuf = NULL;
+ return dmabuf;
+ }
return dma_buf_iter_begin();
}
@@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
{
struct dma_buf *dmabuf = v;
- if (dmabuf)
- dma_buf_put(dmabuf);
+ if (dmabuf) {
+ struct dmabuf_iter_priv *p = seq->private;
+
+ p->dmabuf = dmabuf;
+ }
}
static const struct seq_operations dmabuf_iter_seq_ops = {
@@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
seq_puts(seq, "dmabuf iter\n");
}
+static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
+{
+ struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+ p->dmabuf = NULL;
+ return 0;
+}
+
+static void dmabuf_iter_seq_fini(void *priv)
+{
+ struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+ if (p->dmabuf)
+ dma_buf_put(p->dmabuf);
+}
+
static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
- .init_seq_private = NULL,
- .fini_seq_private = NULL,
- .seq_priv_size = 0,
+ .init_seq_private = dmabuf_iter_seq_init,
+ .fini_seq_private = dmabuf_iter_seq_fini,
+ .seq_priv_size = sizeof(struct dmabuf_iter_priv),
};
static struct bpf_iter_reg bpf_dmabuf_reg_info = {
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a198e40c799b..150e5871e66f 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -71,7 +71,6 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
struct llist_head *lhead;
struct css_rstat_cpu *rstatc;
- struct css_rstat_cpu __percpu *rstatc_pcpu;
struct llist_node *self;
/*
@@ -104,18 +103,22 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
/*
* This function can be renentered by irqs and nmis for the same cgroup
* and may try to insert the same per-cpu lnode into the llist. Note
- * that llist_add() does not protect against such scenarios.
+ * that llist_add() does not protect against such scenarios. In addition
+ * this same per-cpu lnode can be modified through init_llist_node()
+ * from css_rstat_flush() running on a different CPU.
*
* To protect against such stacked contexts of irqs/nmis, we use the
* fact that lnode points to itself when not on a list and then use
- * this_cpu_cmpxchg() to atomically set to NULL to select the winner
+ * try_cmpxchg() to atomically set to NULL to select the winner
* which will call llist_add(). The losers can assume the insertion is
* successful and the winner will eventually add the per-cpu lnode to
* the llist.
+ *
+ * Please note that we can not use this_cpu_cmpxchg() here as on some
+ * archs it is not safe against modifications from multiple CPUs.
*/
self = &rstatc->lnode;
- rstatc_pcpu = css->rstat_cpu;
- if (this_cpu_cmpxchg(rstatc_pcpu->lnode.next, self, NULL) != self)
+ if (!try_cmpxchg(&rstatc->lnode.next, &self, NULL))
return;
lhead = ss_lhead_cpu(css->ss, cpu);
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 05f5a49e9649..94164f2dec6d 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -41,6 +41,13 @@ static bool scx_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
+/*
+ * Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass
+ * depth on enable failure. Will be removed when bypass depth is moved into the
+ * sched instance.
+ */
+static bool scx_bypassed_for_enable;
+
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
@@ -975,6 +982,30 @@ static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
__scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1);
}
+static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p,
+ u64 enq_flags)
+{
+ struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
+ bool preempt = false;
+
+ /*
+ * If @rq is in balance, the CPU is already vacant and looking for the
+ * next task to run. No need to preempt or trigger resched after moving
+ * @p into its local DSQ.
+ */
+ if (rq->scx.flags & SCX_RQ_IN_BALANCE)
+ return;
+
+ if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
+ rq->curr->sched_class == &ext_sched_class) {
+ rq->curr->scx.slice = 0;
+ preempt = true;
+ }
+
+ if (preempt || sched_class_above(&ext_sched_class, rq->curr->sched_class))
+ resched_curr(rq);
+}
+
static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
struct task_struct *p, u64 enq_flags)
{
@@ -1086,22 +1117,10 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
if (enq_flags & SCX_ENQ_CLEAR_OPSS)
atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
- if (is_local) {
- struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
- bool preempt = false;
-
- if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
- rq->curr->sched_class == &ext_sched_class) {
- rq->curr->scx.slice = 0;
- preempt = true;
- }
-
- if (preempt || sched_class_above(&ext_sched_class,
- rq->curr->sched_class))
- resched_curr(rq);
- } else {
+ if (is_local)
+ local_dsq_post_enq(dsq, p, enq_flags);
+ else
raw_spin_unlock(&dsq->lock);
- }
}
static void task_unlink_from_dsq(struct task_struct *p,
@@ -1625,6 +1644,8 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
dsq_mod_nr(dst_dsq, 1);
p->scx.dsq = dst_dsq;
+
+ local_dsq_post_enq(dst_dsq, p, enq_flags);
}
/**
@@ -2402,7 +2423,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
* ops.enqueue() that @p is the only one available for this cpu,
* which should trigger an explicit follow-up scheduling event.
*/
- if (sched_class_above(&ext_sched_class, next->sched_class)) {
+ if (next && sched_class_above(&ext_sched_class, next->sched_class)) {
WARN_ON_ONCE(!(sch->ops.flags & SCX_OPS_ENQ_LAST));
do_enqueue_task(rq, p, SCX_ENQ_LAST, -1);
} else {
@@ -2425,7 +2446,7 @@ static struct task_struct *
do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
{
struct task_struct *prev = rq->curr;
- bool keep_prev, kick_idle = false;
+ bool keep_prev;
struct task_struct *p;
/* see kick_cpus_irq_workfn() */
@@ -2467,12 +2488,8 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
refill_task_slice_dfl(rcu_dereference_sched(scx_root), p);
} else {
p = first_local_task(rq);
- if (!p) {
- if (kick_idle)
- scx_kick_cpu(rcu_dereference_sched(scx_root),
- cpu_of(rq), SCX_KICK_IDLE);
+ if (!p)
return NULL;
- }
if (unlikely(!p->scx.slice)) {
struct scx_sched *sch = rcu_dereference_sched(scx_root);
@@ -3575,7 +3592,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
int node;
irq_work_sync(&sch->error_irq_work);
- kthread_stop(sch->helper->task);
+ kthread_destroy_worker(sch->helper);
free_percpu(sch->pcpu);
@@ -4318,6 +4335,11 @@ static void scx_disable_workfn(struct kthread_work *work)
scx_dsp_max_batch = 0;
free_kick_syncs();
+ if (scx_bypassed_for_enable) {
+ scx_bypassed_for_enable = false;
+ scx_bypass(false);
+ }
+
mutex_unlock(&scx_enable_mutex);
WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
@@ -4786,7 +4808,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
return sch;
err_stop_helper:
- kthread_stop(sch->helper->task);
+ kthread_destroy_worker(sch->helper);
err_free_pcpu:
free_percpu(sch->pcpu);
err_free_gdsqs:
@@ -4970,6 +4992,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* Init in bypass mode to guarantee forward progress.
*/
scx_bypass(true);
+ scx_bypassed_for_enable = true;
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
if (((void (**)(void))ops)[i])
@@ -5067,6 +5090,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
+ scx_bypassed_for_enable = false;
scx_bypass(false);
if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d57727abaade..fe28d86f7c35 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_d_path_btf_ids[0],
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.allowed = bpf_d_path_allowed,
};
diff --git a/mm/shmem.c b/mm/shmem.c
index b329b5302c48..ec6c01378e9d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4019,22 +4019,10 @@ static int shmem_whiteout(struct mnt_idmap *idmap,
whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
if (!whiteout)
return -ENOMEM;
-
error = shmem_mknod(idmap, old_dir, whiteout,
S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
dput(whiteout);
- if (error)
- return error;
-
- /*
- * Cheat and hash the whiteout while the old dentry is still in
- * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
- *
- * d_lookup() will consistently find one of them at this point,
- * not sure which one, but that isn't even important.
- */
- d_rehash(whiteout);
- return 0;
+ return error;
}
/*
@@ -4050,6 +4038,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = S_ISDIR(inode->i_mode);
+ bool had_offset = false;
int error;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
@@ -4062,16 +4051,23 @@ static int shmem_rename2(struct mnt_idmap *idmap,
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
+ error = simple_offset_add(shmem_get_offset_ctx(new_dir), new_dentry);
+ if (error == -EBUSY)
+ had_offset = true;
+ else if (unlikely(error))
+ return error;
+
if (flags & RENAME_WHITEOUT) {
error = shmem_whiteout(idmap, old_dir, old_dentry);
- if (error)
+ if (error) {
+ if (!had_offset)
+ simple_offset_remove(shmem_get_offset_ctx(new_dir),
+ new_dentry);
return error;
+ }
}
- error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
- if (error)
- return error;
-
+ simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
if (d_really_is_positive(new_dentry)) {
(void) shmem_unlink(new_dir, new_dentry);
if (they_are_dirs) {
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 325addf83cc6..277ef504bc26 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -22,10 +22,10 @@ config SMC_DIAG
config SMC_HS_CTRL_BPF
bool "Generic eBPF hook for SMC handshake flow"
- depends on SMC && BPF_SYSCALL
+ depends on SMC && BPF_JIT && BPF_SYSCALL
default y
help
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
handshake flow, which offer much greater flexibility in modifying the behavior
of the SMC protocol stack compared to a complete kernel-based approach. Select
- this option if you want filtring the handshake process via eBPF programs. \ No newline at end of file
+ this option if you want filtring the handshake process via eBPF programs.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 586d1b2595d1..5442073a2e42 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -224,6 +224,8 @@ endif
$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
$(QUIET_CLANG)$(CLANG) \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-I$(or $(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3dc8a8078815..f4dfd23148a5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
struct bpf_object *obj = ctx;
const struct btf_type *t;
struct extern_desc *ext;
- char *res;
+ const char *res;
res = strstr(sym_name, ".llvm.");
if (sym_type == 'd' && res)
@@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
*
* [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
*/
- char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
+ char sym_trim[256], *psym_trim = sym_trim;
+ const char *sym_sfx;
if (!(sym_sfx = strstr(sym_name, ".llvm.")))
return 0;
@@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
if (!search_paths[i])
continue;
for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
- char *next_path;
+ const char *next_path;
int seg_len;
if (s[0] == ':')
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b7030a6e2e76..4aa60e83ff19 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
+ -Wno-microsoft-anon-tag \
+ -fms-extensions \
-Wno-compare-distinct-pointer-types \
-Wno-initializer-overrides \
#
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index ccc768592e66..1a2a2f1abf03 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid)
return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
}
+static inline long syscall_close(int fd)
+{
+ return syscall(__NR_close_range,
+ (unsigned int)fd,
+ (unsigned int)fd,
+ 0u);
+}
+
static int trigger_fstat_events(pid_t pid)
{
int sockfd = -1, procfd = -1, devfd = -1;
@@ -104,18 +112,34 @@ out_close:
/* sys_close no longer triggers filp_close, but we can
* call sys_close_range instead which still does
*/
-#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+ syscall_close(pipefd[0]);
+ syscall_close(pipefd[1]);
+ syscall_close(sockfd);
+ syscall_close(procfd);
+ syscall_close(devfd);
+ syscall_close(localfd);
+ syscall_close(indicatorfd);
+ return ret;
+}
- close(pipefd[0]);
- close(pipefd[1]);
- close(sockfd);
- close(procfd);
- close(devfd);
- close(localfd);
- close(indicatorfd);
+static void attach_and_load(struct test_d_path **skel)
+{
+ int err;
-#undef close
- return ret;
+ *skel = test_d_path__open_and_load();
+ if (CHECK(!*skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(*skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ (*skel)->bss->my_pid = getpid();
+ return;
+
+cleanup:
+ test_d_path__destroy(*skel);
+ *skel = NULL;
}
static void test_d_path_basic(void)
@@ -124,16 +148,11 @@ static void test_d_path_basic(void)
struct test_d_path *skel;
int err;
- skel = test_d_path__open_and_load();
- if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
- goto cleanup;
-
- err = test_d_path__attach(skel);
- if (CHECK(err, "setup", "attach failed: %d\n", err))
+ attach_and_load(&skel);
+ if (!skel)
goto cleanup;
bss = skel->bss;
- bss->my_pid = getpid();
err = trigger_fstat_events(bss->my_pid);
if (err < 0)
@@ -195,6 +214,39 @@ static void test_d_path_check_types(void)
test_d_path_check_types__destroy(skel);
}
+/* Check if the verifier correctly generates code for
+ * accessing the memory modified by d_path helper.
+ */
+static void test_d_path_mem_access(void)
+{
+ int localfd = -1;
+ char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX";
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+
+ attach_and_load(&skel);
+ if (!skel)
+ goto cleanup;
+
+ bss = skel->bss;
+
+ localfd = mkstemp(path_template);
+ if (CHECK(localfd < 0, "trigger", "mkstemp failed\n"))
+ goto cleanup;
+
+ if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n"))
+ goto cleanup;
+ remove(path_template);
+
+ if (CHECK(!bss->path_match_fallocate, "check",
+ "failed to read fallocate path"))
+ goto cleanup;
+
+cleanup:
+ syscall_close(localfd);
+ test_d_path__destroy(skel);
+}
+
void test_d_path(void)
{
if (test__start_subtest("basic"))
@@ -205,4 +257,7 @@ void test_d_path(void)
if (test__start_subtest("check_alloc_mem"))
test_d_path_check_types();
+
+ if (test__start_subtest("check_mem_access"))
+ test_d_path_mem_access();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
index 6c2b0c3dbcd8..e442be9dde7e 100644
--- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -73,12 +73,10 @@ close_memfd:
return -1;
}
-static int create_sys_heap_dmabuf(void)
+static int create_sys_heap_dmabuf(size_t bytes)
{
- sysheap_test_buffer_size = 20 * getpagesize();
-
struct dma_heap_allocation_data data = {
- .len = sysheap_test_buffer_size,
+ .len = bytes,
.fd = 0,
.fd_flags = O_RDWR | O_CLOEXEC,
.heap_flags = 0,
@@ -110,7 +108,9 @@ close_sysheap_dmabuf:
static int create_test_buffers(void)
{
udmabuf = create_udmabuf();
- sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ sysheap_test_buffer_size = 20 * getpagesize();
+ sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size);
if (udmabuf < 0 || sysheap_dmabuf < 0)
return -1;
@@ -219,6 +219,26 @@ close_iter_fd:
close(iter_fd);
}
+static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[1024];
+ size_t total_bytes_read = 0;
+ ssize_t bytes_read;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
+ total_bytes_read += bytes_read;
+
+ ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read");
+
+ close(iter_fd);
+}
+
+
static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
@@ -275,6 +295,23 @@ void test_dmabuf_iter(void)
subtest_dmabuf_iter_check_no_infinite_reads(skel);
if (test__start_subtest("default_iter"))
subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("lots_of_buffers")) {
+ size_t NUM_BUFS = 100;
+ int buffers[NUM_BUFS];
+ int i;
+
+ for (i = 0; i < NUM_BUFS; ++i) {
+ buffers[i] = create_sys_heap_dmabuf(getpagesize());
+ if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd"))
+ goto cleanup_bufs;
+ }
+
+ subtest_dmabuf_iter_check_lots_of_buffers(skel);
+
+cleanup_bufs:
+ for (--i; i >= 0; --i)
+ close(buffers[i]);
+ }
if (test__start_subtest("open_coded"))
subtest_dmabuf_iter_check_open_coded(skel, map_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
index 84e1f883f97b..561b2f861808 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {};
int called_stat = 0;
int called_close = 0;
+int path_match_fallocate = 0;
SEC("fentry/security_inode_getattr")
int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
@@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id)
return 0;
}
+SEC("fentry/vfs_fallocate")
+int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ int ret = 0;
+ char path_fallocate[MAX_PATH_LEN] = {};
+
+ if (pid != my_pid)
+ return 0;
+
+ ret = bpf_d_path(&file->f_path,
+ path_fallocate, MAX_PATH_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (!path_fallocate[0])
+ return 0;
+
+ path_match_fallocate = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c
index aa2d7d32dda9..5748d2c69903 100644
--- a/tools/testing/selftests/sched_ext/runner.c
+++ b/tools/testing/selftests/sched_ext/runner.c
@@ -46,6 +46,14 @@ static void print_test_preamble(const struct scx_test *test, bool quiet)
if (!quiet)
printf("DESCRIPTION: %s\n", test->description);
printf("OUTPUT:\n");
+
+ /*
+ * The tests may fork with the preamble buffered
+ * in the children's stdout. Flush before the test
+ * to avoid printing the message multiple times.
+ */
+ fflush(stdout);
+ fflush(stderr);
}
static const char *status_to_result(enum scx_test_status status)