From d9f514d3e6ee48c34d70d637479b4c9384832d4f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 23 Nov 2025 22:51:23 +0000 Subject: block: move around bio flagging helpers We'll need bio_flagged() earlier in bio.h for later patches, move it together with all related helpers, and mark the bio_flagged()'s bio argument as const. Signed-off-by: Pavel Begunkov Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index ad2d57908c1c..c75a9b3672aa 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -46,6 +46,21 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) +static inline bool bio_flagged(const struct bio *bio, unsigned int bit) +{ + return bio->bi_flags & (1U << bit); +} + +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1U << bit); +} + +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags &= ~(1U << bit); +} + /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ @@ -225,21 +240,6 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) atomic_set(&bio->__bi_cnt, count); } -static inline bool bio_flagged(struct bio *bio, unsigned int bit) -{ - return bio->bi_flags & (1U << bit); -} - -static inline void bio_set_flag(struct bio *bio, unsigned int bit) -{ - bio->bi_flags |= (1U << bit); -} - -static inline void bio_clear_flag(struct bio *bio, unsigned int bit) -{ - bio->bi_flags &= ~(1U << bit); -} - static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); -- cgit v1.2.3 From 9637fc3bdd10c8e073f71897bd35babbd21e9b29 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2025 10:16:59 -0700 Subject: selftests: ublk: fix overflow in ublk_queue_auto_zc_fallback() The functions ublk_queue_use_zc(), ublk_queue_use_auto_zc(), and ublk_queue_auto_zc_fallback() were returning int, but performing bitwise AND on q->flags which is __u64. When a flag bit is set in the upper 32 bits (beyond INT_MAX), the result of the bitwise AND operation could overflow when cast to int, leading to incorrect boolean evaluation. For example, if UBLKS_Q_AUTO_BUF_REG_FALLBACK is 0x8000000000000000: - (u64)flags & 0x8000000000000000 = 0x8000000000000000 - Cast to int: undefined behavior / incorrect value - Used in if(): may evaluate incorrectly Fix by: 1. Changing return type from int to bool for semantic correctness 2. Using !! to explicitly convert to boolean (0 or 1) This ensures the functions return proper boolean values regardless of which bit position the flags occupy in the 64-bit field. Fixes: c3a6d48f86da ("selftests: ublk: remove ublk queue self-defined flags") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index fe42705c6d42..6e8f381f3481 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -390,19 +390,19 @@ static inline int ublk_completed_tgt_io(struct ublk_thread *t, return --io->tgt_ios == 0; } -static inline int ublk_queue_use_zc(const struct ublk_queue *q) +static inline bool ublk_queue_use_zc(const struct ublk_queue *q) { - return q->flags & UBLK_F_SUPPORT_ZERO_COPY; + return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY); } -static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) +static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q) { - return q->flags & UBLK_F_AUTO_BUF_REG; + return !!(q->flags & UBLK_F_AUTO_BUF_REG); } -static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) +static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) { - return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; + return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); } static inline int ublk_queue_no_buf(const struct ublk_queue *q) -- cgit v1.2.3 From 1fd4b8d7e3cf102bd01a295460d629e15152d2b3 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:00 -0700 Subject: selftests: ublk: correct last_rw map type in seq_io.bt The last_rw map is initialized with a value of 0 but later assigned the value args.sector + args.nr_sector, which has type sector_t = u64. bpftrace complains about the type mismatch between int64 and uint64: trace/seq_io.bt:18:3-59: ERROR: Type mismatch for @last_rw: trying to assign value of type 'uint64' when map already contains a value of type 'int64' @last_rw[$dev, str($2)] = (args.sector + args.nr_sector); Cast the initial value to uint64 so bpftrace will load the program. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/trace/seq_io.bt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt index 272ac54c9d5f..507a3ca05abf 100644 --- a/tools/testing/selftests/ublk/trace/seq_io.bt +++ b/tools/testing/selftests/ublk/trace/seq_io.bt @@ -4,7 +4,7 @@ $3: strlen($2) */ BEGIN { - @last_rw[$1, str($2)] = 0; + @last_rw[$1, str($2)] = (uint64)0; } tracepoint:block:block_rq_complete { -- cgit v1.2.3 From fe8c0182d49414740e639c1ca6b7d4b8e36b77fe Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:01 -0700 Subject: selftests: ublk: remove unused ios map in seq_io.bt The ios map populated by seq_io.bt is never read, so remove it. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/trace/seq_io.bt | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt index 507a3ca05abf..b2f60a92b118 100644 --- a/tools/testing/selftests/ublk/trace/seq_io.bt +++ b/tools/testing/selftests/ublk/trace/seq_io.bt @@ -17,7 +17,6 @@ tracepoint:block:block_rq_complete } @last_rw[$dev, str($2)] = (args.sector + args.nr_sector); } - @ios = count(); } END { -- cgit v1.2.3 From 58eec4f3fc2878de51239916953b736b674d5071 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:02 -0700 Subject: selftests: ublk: fix fio arguments in run_io_and_recover() run_io_and_recover() invokes fio with --size="${size}", but the variable size doesn't exist. Thus, the argument expands to --size=, which causes fio to exit immediately with an error without issuing any I/O. Pass the value for size as the first argument to the function. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 5 +++-- tools/testing/selftests/ublk/test_generic_04.sh | 2 +- tools/testing/selftests/ublk/test_generic_05.sh | 2 +- tools/testing/selftests/ublk/test_generic_11.sh | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 8a4dbd09feb0..6f1c042de40e 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -333,11 +333,12 @@ run_io_and_kill_daemon() run_io_and_recover() { - local action=$1 + local size=$1 + local action=$2 local state local dev_id - shift 1 + shift 2 dev_id=$(_add_ublk_dev "$@") _check_add_dev "$TID" $? diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh index 8b533217d4a1..baf5b156193d 100755 --- a/tools/testing/selftests/ublk/test_generic_04.sh +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "kill_daemon" "$@" + run_io_and_recover 256M "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 398e9e2b58e1..7b5083afc02a 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "kill_daemon" "$@" + run_io_and_recover 256M "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh index a00357a5ec6b..d1f973c8c645 100755 --- a/tools/testing/selftests/ublk/test_generic_11.sh +++ b/tools/testing/selftests/ublk/test_generic_11.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_quiesce_recover() { - run_io_and_recover "quiesce_dev" "$@" + run_io_and_recover 256M "quiesce_dev" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" -- cgit v1.2.3 From 20da98a07bcbacb15fc627f6cf426a2f4d1501e5 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:03 -0700 Subject: selftests: ublk: use auto_zc for PER_IO_DAEMON tests in stress_04 stress_04 is described as "run IO and kill ublk server(zero copy)" but the --per_io_tasks tests cases don't use zero copy. Plus, one of the test cases is duplicated. Add --auto_zc to these test cases and --auto_zc_fallback to one of the duplicated ones. This matches the test cases in stress_03. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_stress_04.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 3f901db4d09d..c0c926ce0539 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -40,10 +40,10 @@ if _have_feature "AUTO_BUF_REG"; then fi if _have_feature "PER_IO_DAEMON"; then - ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks & - ublk_io_and_kill_daemon 256M -t loop -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & - ublk_io_and_kill_daemon 256M -t stripe -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & - ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks & + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & fi wait -- cgit v1.2.3 From d8295408e0cf529be78ee4ed8b6758a9fb209feb Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:04 -0700 Subject: selftests: ublk: don't share backing files between ublk servers stress_04 is missing a wait between blocks of tests, meaning multiple ublk servers will be running in parallel using the same backing files. Add a wait after each section to ensure each backing file is in use by a single ublk server at a time. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_stress_04.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index c0c926ce0539..efa8dc33234b 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -31,12 +31,14 @@ _create_backfile 2 128M ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi if _have_feature "PER_IO_DAEMON"; then @@ -44,8 +46,8 @@ if _have_feature "PER_IO_DAEMON"; then ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE -- cgit v1.2.3 From 52bc483763262b66e51818a82e03cad0c5bfef67 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:05 -0700 Subject: selftests: ublk: forbid multiple data copy modes The kublk mock ublk server allows multiple data copy mode arguments to be passed on the command line (--zero_copy, --get_data, and --auto_zc). The ublk device will be created with all the requested feature flags, however kublk will only use one of the modes to interact with request data (arbitrarily preferring auto_zc over zero_copy over get_data). To clarify the intent of the test, don't allow multiple data copy modes to be specified. --zero_copy and --auto_zc are allowed together for --auto_zc_fallback, which uses both copy modes. Don't set UBLK_F_USER_COPY for zero_copy, as it's a separate feature. Fix the test cases in test_stress_05 passing --get_data along with --zero_copy or --auto_zc. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 11 ++++++++++- tools/testing/selftests/ublk/test_stress_05.sh | 10 +++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index f8fa102a627f..4dd02cb083ba 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1613,7 +1613,7 @@ int main(int argc, char *argv[]) ctx.queue_depth = strtol(optarg, NULL, 10); break; case 'z': - ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; + ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY; break; case 'r': value = strtol(optarg, NULL, 10); @@ -1686,6 +1686,15 @@ int main(int argc, char *argv[]) return -EINVAL; } + if (!!(ctx.flags & UBLK_F_NEED_GET_DATA) + + !!(ctx.flags & UBLK_F_USER_COPY) + + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY && !ctx.auto_zc_fallback) + + (ctx.flags & UBLK_F_AUTO_BUF_REG && !ctx.auto_zc_fallback) + + ctx.auto_zc_fallback > 1) { + fprintf(stderr, "too many data copy modes specified\n"); + return -EINVAL; + } + i = optind; while (i < argc && ctx.nr_files < MAX_BACK_FILES) { ctx.files[ctx.nr_files++] = argv[i++]; diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 274295061042..68a194144302 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -58,17 +58,17 @@ done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi if _have_feature "AUTO_BUF_REG"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & - ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & + ublk_io_and_remove 8G -t null -q 4 --auto_zc -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & wait done fi -- cgit v1.2.3 From b9f0a94c3b2e7deaff93e4c4de335e3054223ff4 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:06 -0700 Subject: selftests: ublk: add support for user copy to kublk The ublk selftests mock ublk server kublk supports every data copy mode except user copy. Add support for user copy to kublk, enabled via the --user_copy (-u) command line argument. On writes, issue pread() calls to copy the write data into the ublk_io's buffer before dispatching the write to the target implementation. On reads, issue pwrite() calls to copy read data from the ublk_io's buffer before committing the request. Copy in 2 KB chunks to provide some coverage of the offseting logic. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/file_backed.c | 7 ++-- tools/testing/selftests/ublk/kublk.c | 53 +++++++++++++++++++++++++++--- tools/testing/selftests/ublk/kublk.h | 11 +++++++ tools/testing/selftests/ublk/stripe.c | 2 +- 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index cd9fe69ecce2..269d5f124e06 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -34,8 +34,9 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, unsigned zc = ublk_queue_use_zc(q); unsigned auto_zc = ublk_queue_use_auto_zc(q); enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); + struct ublk_io *io = ublk_get_io(q, tag); struct io_uring_sqe *sqe[3]; - void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; + void *addr = io->buf_addr; if (!zc || auto_zc) { ublk_io_alloc_sqes(t, sqe, 1); @@ -56,7 +57,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); @@ -68,7 +69,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); - io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, io->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); return 2; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 4dd02cb083ba..185ba553686a 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -596,6 +596,38 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q, sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); } +/* Copy in pieces to test the buffer offset logic */ +#define UBLK_USER_COPY_LEN 2048 + +static void ublk_user_copy(const struct ublk_io *io, __u8 match_ublk_op) +{ + const struct ublk_queue *q = ublk_io_to_queue(io); + const struct ublksrv_io_desc *iod = ublk_get_iod(q, io->tag); + __u64 off = ublk_user_copy_offset(q->q_id, io->tag); + __u8 ublk_op = ublksrv_get_op(iod); + __u32 len = iod->nr_sectors << 9; + void *addr = io->buf_addr; + + if (ublk_op != match_ublk_op) + return; + + while (len) { + __u32 copy_len = min(len, UBLK_USER_COPY_LEN); + ssize_t copied; + + if (ublk_op == UBLK_IO_OP_WRITE) + copied = pread(q->ublk_fd, addr, copy_len, off); + else if (ublk_op == UBLK_IO_OP_READ) + copied = pwrite(q->ublk_fd, addr, copy_len, off); + else + assert(0); + assert(copied == (ssize_t)copy_len); + addr += copy_len; + off += copy_len; + len -= copy_len; + } +} + int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) { struct ublk_queue *q = ublk_io_to_queue(io); @@ -618,9 +650,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) if (io->flags & UBLKS_IO_NEED_GET_DATA) cmd_op = UBLK_U_IO_NEED_GET_DATA; - else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) + else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) { + if (ublk_queue_use_user_copy(q)) + ublk_user_copy(io, UBLK_IO_OP_READ); + cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; - else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) + } else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; if (io_uring_sq_space_left(&t->ring) < 1) @@ -649,7 +684,7 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; - if (!ublk_queue_no_buf(q)) + if (!ublk_queue_no_buf(q) && !ublk_queue_use_user_copy(q)) cmd->addr = (__u64) (uintptr_t) io->buf_addr; else cmd->addr = 0; @@ -751,6 +786,10 @@ static void ublk_handle_uring_cmd(struct ublk_thread *t, if (cqe->res == UBLK_IO_RES_OK) { assert(tag < q->q_depth); + + if (ublk_queue_use_user_copy(q)) + ublk_user_copy(io, UBLK_IO_OP_WRITE); + if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(t, q, tag); } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { @@ -1507,7 +1546,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); + printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1] [-g] [-u]\n"); printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); printf("\t[--nthreads threads] [--per_io_tasks]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); @@ -1568,6 +1607,7 @@ int main(int argc, char *argv[]) { "get_data", 1, NULL, 'g'}, { "auto_zc", 0, NULL, 0 }, { "auto_zc_fallback", 0, NULL, 0 }, + { "user_copy", 0, NULL, 'u'}, { "size", 1, NULL, 's'}, { "nthreads", 1, NULL, 0 }, { "per_io_tasks", 0, NULL, 0 }, @@ -1593,7 +1633,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gazu", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1633,6 +1673,9 @@ int main(int argc, char *argv[]) case 'g': ctx.flags |= UBLK_F_NEED_GET_DATA; break; + case 'u': + ctx.flags |= UBLK_F_USER_COPY; + break; case 's': ctx.size = strtoull(optarg, NULL, 10); break; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 6e8f381f3481..8a83b90ec603 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -208,6 +208,12 @@ static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); } +static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag) +{ + return UBLKSRV_IO_BUF_OFFSET + + ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF); +} + static inline int is_target_io(__u64 user_data) { return (user_data & (1ULL << 63)) != 0; @@ -405,6 +411,11 @@ static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); } +static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q) +{ + return !!(q->flags & UBLK_F_USER_COPY); +} + static inline int ublk_queue_no_buf(const struct ublk_queue *q) { return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 791fa8dc1651..fd412e1f01c0 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -134,7 +134,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); int i, extra = zc ? 2 : 0; - void *base = (zc | auto_zc) ? NULL : (void *)iod->addr; + void *base = io->buf_addr; io->private_data = s; calculate_stripe_array(conf, iod, s, base); -- cgit v1.2.3 From 63276182c51332b75293ac88f3a81d98bfca1b93 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 12 Dec 2025 10:17:07 -0700 Subject: selftests: ublk: add user copy test cases The ublk selftests cover every data copy mode except user copy. Add tests for user copy based on the existing test suite: - generic_14 ("basic recover function verification (user copy)") based on generic_04 and generic_05 - null_03 ("basic IO test with user copy") based on null_01 and null_02 - loop_06 ("write and verify over user copy") based on loop_01 and loop_03 - loop_07 ("mkfs & mount & umount with user copy") based on loop_02 and loop_04 - stripe_05 ("write and verify test on user copy") based on stripe_03 - stripe_06 ("mkfs & mount & umount on user copy") based on stripe_02 and stripe_04 - stress_06 ("run IO and remove device (user copy)") based on stress_01 and stress_03 - stress_07 ("run IO and kill ublk server (user copy)") based on stress_02 and stress_04 Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 8 +++++ tools/testing/selftests/ublk/test_generic_14.sh | 40 +++++++++++++++++++++++++ tools/testing/selftests/ublk/test_loop_06.sh | 25 ++++++++++++++++ tools/testing/selftests/ublk/test_loop_07.sh | 21 +++++++++++++ tools/testing/selftests/ublk/test_null_03.sh | 24 +++++++++++++++ tools/testing/selftests/ublk/test_stress_06.sh | 39 ++++++++++++++++++++++++ tools/testing/selftests/ublk/test_stress_07.sh | 39 ++++++++++++++++++++++++ tools/testing/selftests/ublk/test_stripe_05.sh | 26 ++++++++++++++++ tools/testing/selftests/ublk/test_stripe_06.sh | 21 +++++++++++++ 9 files changed, 243 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_generic_14.sh create mode 100755 tools/testing/selftests/ublk/test_loop_06.sh create mode 100755 tools/testing/selftests/ublk/test_loop_07.sh create mode 100755 tools/testing/selftests/ublk/test_null_03.sh create mode 100755 tools/testing/selftests/ublk/test_stress_06.sh create mode 100755 tools/testing/selftests/ublk/test_stress_07.sh create mode 100755 tools/testing/selftests/ublk/test_stripe_05.sh create mode 100755 tools/testing/selftests/ublk/test_stripe_06.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 770269efe42a..837977b62417 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -21,24 +21,32 @@ TEST_PROGS += test_generic_10.sh TEST_PROGS += test_generic_11.sh TEST_PROGS += test_generic_12.sh TEST_PROGS += test_generic_13.sh +TEST_PROGS += test_generic_14.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh +TEST_PROGS += test_null_03.sh TEST_PROGS += test_loop_01.sh TEST_PROGS += test_loop_02.sh TEST_PROGS += test_loop_03.sh TEST_PROGS += test_loop_04.sh TEST_PROGS += test_loop_05.sh +TEST_PROGS += test_loop_06.sh +TEST_PROGS += test_loop_07.sh TEST_PROGS += test_stripe_01.sh TEST_PROGS += test_stripe_02.sh TEST_PROGS += test_stripe_03.sh TEST_PROGS += test_stripe_04.sh +TEST_PROGS += test_stripe_05.sh +TEST_PROGS += test_stripe_06.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh TEST_PROGS += test_stress_03.sh TEST_PROGS += test_stress_04.sh TEST_PROGS += test_stress_05.sh +TEST_PROGS += test_stress_06.sh +TEST_PROGS += test_stress_07.sh TEST_GEN_PROGS_EXTENDED = kublk diff --git a/tools/testing/selftests/ublk/test_generic_14.sh b/tools/testing/selftests/ublk/test_generic_14.sh new file mode 100755 index 000000000000..cd9b44b97c24 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_14.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_14" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover 256M "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -u & +ublk_run_recover_test -t loop -q 2 -r 1 -u "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -u -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -u -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_06.sh b/tools/testing/selftests/ublk/test_loop_06.sh new file mode 100755 index 000000000000..1d1a8a725502 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_06.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_06" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify over user copy" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test "loop" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_07.sh b/tools/testing/selftests/ublk/test_loop_07.sh new file mode 100755 index 000000000000..493f3fb611a5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_07.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_07" +ERR_CODE=0 + +_prep_test "loop" "mkfs & mount & umount with user copy" + +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -u "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "loop" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_03.sh b/tools/testing/selftests/ublk/test_null_03.sh new file mode 100755 index 000000000000..0051067b4686 --- /dev/null +++ b/tools/testing/selftests/ublk/test_null_03.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="null_03" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test with user copy" + +dev_id=$(_add_ublk_dev -t null -u) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_06.sh b/tools/testing/selftests/ublk/test_stress_06.sh new file mode 100755 index 000000000000..37188ec2e1f7 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_06.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_06" +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -u & +ublk_io_and_remove 256M -t loop -q 4 -u "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_io_and_remove 8G -t null -q 4 -u --nthreads 8 --per_io_tasks & +ublk_io_and_remove 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_07.sh b/tools/testing/selftests/ublk/test_stress_07.sh new file mode 100755 index 000000000000..fb061fc26d36 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_07.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_07" +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server (user copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -u --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -u --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -u "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_io_and_kill_daemon 8G -t null -q 4 -u --nthreads 8 --per_io_tasks & +ublk_io_and_kill_daemon 256M -t loop -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -u --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_05.sh b/tools/testing/selftests/ublk/test_stripe_05.sh new file mode 100755 index 000000000000..05d71951d710 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_05.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_05" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test on user copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -q 2 -u "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test "stripe" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_06.sh b/tools/testing/selftests/ublk/test_stripe_06.sh new file mode 100755 index 000000000000..d06cac7626e2 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_06.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_06" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on user copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -u -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" +_show_result $TID $ERR_CODE -- cgit v1.2.3 From 9869d3a6fed381f3b98404e26e1afc75d680cbf9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2025 22:35:00 +0800 Subject: block: fix race between wbt_enable_default and IO submission When wbt_enable_default() is moved out of queue freezing in elevator_change(), it can cause the wbt inflight counter to become negative (-1), leading to hung tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter is in an inconsistent state. The issue occurs because wbt_enable_default() could race with IO submission, allowing the counter to be decremented before proper initialization. This manifests as: rq_wait[0]: inflight: -1 has_waiters: True rwb_enabled() checks the state, which can be updated exactly between wbt_wait() (rq_qos_throttle()) and wbt_track()(rq_qos_track()), then the inflight counter will become negative. And results in hung task warnings like: task:kworker/u24:39 state:D stack:0 pid:14767 Call Trace: rq_qos_wait+0xb4/0x150 wbt_wait+0xa9/0x100 __rq_qos_throttle+0x24/0x40 blk_mq_submit_bio+0x672/0x7b0 ... Fix this by: 1. Splitting wbt_enable_default() into: - __wbt_enable_default(): Returns true if wbt_init() should be called - wbt_enable_default(): Wrapper for existing callers (no init) - wbt_init_enable_default(): New function that checks and inits WBT 2. Using wbt_init_enable_default() in blk_register_queue() to ensure proper initialization during queue registration 3. Move wbt_init() out of wbt_enable_default() which is only for enabling disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the original lock warning can be avoided. 4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling code since it's no longer needed This ensures WBT is properly initialized before any IO can be submitted, preventing the counter from going negative. Cc: Nilay Shroff Cc: Yu Kuai Cc: Guangwu Zhang Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()") Signed-off-by: Ming Lei Reviewed-by: Nilay Shroff Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- block/blk-sysfs.c | 2 +- block/blk-wbt.c | 20 ++++++++++++++++---- block/blk-wbt.h | 5 +++++ block/elevator.c | 4 ---- block/elevator.h | 1 - 6 files changed, 23 insertions(+), 11 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4a8d3d96bfe4..6e54b1d3d8bc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e) blk_stat_disable_accounting(bfqd->queue); blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue); - set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags); + wbt_enable_default(bfqd->queue->disk); kfree(bfqd); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8684c57498cc..e0a70d26972b 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk) elevator_set_default(q); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); - wbt_enable_default(disk); + wbt_init_enable_default(disk); /* Now everything is ready and send out KOBJ_ADD uevent */ kobject_uevent(&disk->queue_kobj, KOBJ_ADD); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index eb8037bae0bd..0974875f77bd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq) /* * Enable wbt if defaults are configured that way */ -void wbt_enable_default(struct gendisk *disk) +static bool __wbt_enable_default(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_qos *rqos; @@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk) if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; mutex_unlock(&disk->rqos_state_mutex); - return; + return false; } mutex_unlock(&disk->rqos_state_mutex); /* Queue not registered? Maybe shutting down... */ if (!blk_queue_registered(q)) - return; + return false; if (queue_is_mq(q) && enable) - wbt_init(disk); + return true; + return false; +} + +void wbt_enable_default(struct gendisk *disk) +{ + __wbt_enable_default(disk); } EXPORT_SYMBOL_GPL(wbt_enable_default); +void wbt_init_enable_default(struct gendisk *disk) +{ + if (__wbt_enable_default(disk)) + WARN_ON_ONCE(wbt_init(disk)); +} + u64 wbt_default_latency_nsec(struct request_queue *q) { /* diff --git a/block/blk-wbt.h b/block/blk-wbt.h index e5fc653b9b76..925f22475738 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -5,6 +5,7 @@ #ifdef CONFIG_BLK_WBT int wbt_init(struct gendisk *disk); +void wbt_init_enable_default(struct gendisk *disk); void wbt_disable_default(struct gendisk *disk); void wbt_enable_default(struct gendisk *disk); @@ -16,6 +17,10 @@ u64 wbt_default_latency_nsec(struct request_queue *); #else +static inline void wbt_init_enable_default(struct gendisk *disk) +{ +} + static inline void wbt_disable_default(struct gendisk *disk) { } diff --git a/block/elevator.c b/block/elevator.c index 5b37ef44f52d..a2f8b2251dc6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -633,14 +633,10 @@ static int elevator_change_done(struct request_queue *q, .et = ctx->old->et, .data = ctx->old->elevator_data }; - bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, - &ctx->old->flags); elv_unregister_queue(q, ctx->old); blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set); kobject_put(&ctx->old->kobj); - if (enable_wbt) - wbt_enable_default(q->disk); } if (ctx->new) { ret = elv_register_queue(q, ctx->new, !ctx->no_uevent); diff --git a/block/elevator.h b/block/elevator.h index a9d092c5a9e8..3eb32516be0b 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -156,7 +156,6 @@ struct elevator_queue #define ELEVATOR_FLAG_REGISTERED 0 #define ELEVATOR_FLAG_DYING 1 -#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT 2 /* * block elevator interface -- cgit v1.2.3 From 54891a96b7a90d77c32bd0a7d6c9987e5479a314 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 15 Dec 2025 23:21:04 +0800 Subject: loop: use READ_ONCE() to read lo->lo_state without locking When lo->lo_mutex is not held, direct access may read stale data. This patch uses READ_ONCE() to read lo->lo_state and data_race() to silence code checkers, and changes all assignments to use WRITE_ONCE(). Reviewed-by: Damien Le Moal Signed-off-by: Yongpeng Yang Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 272bc608e528..32a3a5b13802 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, /* Order wrt reading lo_state in loop_validate_file(). */ wmb(); - lo->lo_state = Lo_bound; + WRITE_ONCE(lo->lo_state, Lo_bound); if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; @@ -1179,7 +1179,7 @@ static void __loop_clr_fd(struct loop_device *lo) if (!part_shift) set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); mutex_lock(&lo->lo_mutex); - lo->lo_state = Lo_unbound; + WRITE_ONCE(lo->lo_state, Lo_unbound); mutex_unlock(&lo->lo_mutex); /* @@ -1218,7 +1218,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags |= LO_FLAGS_AUTOCLEAR; if (disk_openers(lo->lo_disk) == 1) - lo->lo_state = Lo_rundown; + WRITE_ONCE(lo->lo_state, Lo_rundown); loop_global_unlock(lo, true); return 0; @@ -1743,7 +1743,7 @@ static void lo_release(struct gendisk *disk) mutex_lock(&lo->lo_mutex); if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) - lo->lo_state = Lo_rundown; + WRITE_ONCE(lo->lo_state, Lo_rundown); need_clear = (lo->lo_state == Lo_rundown); mutex_unlock(&lo->lo_mutex); @@ -1858,7 +1858,7 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - if (lo->lo_state != Lo_bound) + if (data_race(READ_ONCE(lo->lo_state)) != Lo_bound) return BLK_STS_IOERR; switch (req_op(rq)) { @@ -2016,7 +2016,7 @@ static int loop_add(int i) lo->worker_tree = RB_ROOT; INIT_LIST_HEAD(&lo->idle_worker_list); timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE); - lo->lo_state = Lo_unbound; + WRITE_ONCE(lo->lo_state, Lo_unbound); err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -2174,7 +2174,7 @@ static int loop_control_remove(int idx) goto mark_visible; } /* Mark this loop device as no more bound, but not quite unbound yet */ - lo->lo_state = Lo_deleting; + WRITE_ONCE(lo->lo_state, Lo_deleting); mutex_unlock(&lo->lo_mutex); loop_remove(lo); @@ -2197,8 +2197,12 @@ static int loop_control_get_free(int idx) if (ret) return ret; idr_for_each_entry(&loop_index_idr, lo, id) { - /* Hitting a race results in creating a new loop device which is harmless. */ - if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound) + /* + * Hitting a race results in creating a new loop device + * which is harmless. + */ + if (lo->idr_visible && + data_race(READ_ONCE(lo->lo_state)) == Lo_unbound) goto found; } mutex_unlock(&loop_ctl_mutex); -- cgit v1.2.3 From 4b2b03151e2e3c11a3caae6e0223964dc771b003 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 15 Dec 2025 23:21:06 +0800 Subject: zloop: use READ_ONCE() to read lo->lo_state in queue_rq path In the queue_rq path, zlo->state is accessed without locking, and direct access may read stale data. This patch uses READ_ONCE() to read zlo->state and data_race() to silence code checkers, and changes all assignments to use WRITE_ONCE(). Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Yongpeng Yang Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 77bd6081b244..8e334f5025fc 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -697,7 +697,7 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); struct zloop_device *zlo = rq->q->queuedata; - if (zlo->state == Zlo_deleting) + if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting) return BLK_STS_IOERR; /* @@ -1002,7 +1002,7 @@ static int zloop_ctl_add(struct zloop_options *opts) ret = -ENOMEM; goto out; } - zlo->state = Zlo_creating; + WRITE_ONCE(zlo->state, Zlo_creating); ret = mutex_lock_killable(&zloop_ctl_mutex); if (ret) @@ -1113,7 +1113,7 @@ static int zloop_ctl_add(struct zloop_options *opts) } mutex_lock(&zloop_ctl_mutex); - zlo->state = Zlo_live; + WRITE_ONCE(zlo->state, Zlo_live); mutex_unlock(&zloop_ctl_mutex); pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n", @@ -1177,7 +1177,7 @@ static int zloop_ctl_remove(struct zloop_options *opts) ret = -EINVAL; } else { idr_remove(&zloop_index_idr, zlo->id); - zlo->state = Zlo_deleting; + WRITE_ONCE(zlo->state, Zlo_deleting); } mutex_unlock(&zloop_ctl_mutex); -- cgit v1.2.3 From 67d85b062dcb49af9c903a58842a4ed7281f57b8 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 15 Dec 2025 17:58:17 +0800 Subject: Documentation: admin-guide: blockdev: replace zone_capacity with zone_capacity_mb when creating devices The "zone_capacity=%umb" option is no longer used. The effective option is now "zone_capacity_mb=%u", so update the documentation accordingly. Signed-off-by: Yongpeng Yang Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- Documentation/admin-guide/blockdev/zoned_loop.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst index 806adde664db..6aa865424ac3 100644 --- a/Documentation/admin-guide/blockdev/zoned_loop.rst +++ b/Documentation/admin-guide/blockdev/zoned_loop.rst @@ -134,7 +134,7 @@ MB and a zone capacity of 63 MB:: $ modprobe zloop $ mkdir -p /var/local/zloop/0 - $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control + $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity_mb=63" > /dev/zloop-control For the device created (/dev/zloop0), the zone backing files are all created under the default base directory (/var/local/zloop):: -- cgit v1.2.3 From a58383fa45c706bda3bf4a1955c3a0327dbec7e7 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Wed, 17 Dec 2025 07:17:12 +0530 Subject: block: add allocation size check in blkdev_pr_read_keys() blkdev_pr_read_keys() takes num_keys from userspace and uses it to calculate the allocation size for keys_info via struct_size(). While there is a check for SIZE_MAX (integer overflow), there is no upper bound validation on the allocation size itself. A malicious or buggy userspace can pass a large num_keys value that doesn't trigger overflow but still results in an excessive allocation attempt, causing a warning in the page allocator when the order exceeds MAX_PAGE_ORDER. Fix this by introducing PR_KEYS_MAX to limit the number of keys to a sane value. This makes the SIZE_MAX check redundant, so remove it. Also switch to kvzalloc/kvfree to handle larger allocations gracefully. Fixes: 22a1ffea5f80 ("block: add IOC_PR_READ_KEYS ioctl") Tested-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com Reported-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=660d079d90f8a1baf54d Link: https://lore.kernel.org/all/20251212013510.3576091-1-kartikey406@gmail.com/T/ [v1] Signed-off-by: Deepanshu Kartikey Reviewed-by: Martin K. Petersen Reviewed-by: Stefan Hajnoczi Signed-off-by: Jens Axboe --- block/ioctl.c | 9 +++++---- include/uapi/linux/pr.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index 61feed686418..344478348a54 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -442,11 +442,12 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, if (copy_from_user(&read_keys, arg, sizeof(read_keys))) return -EFAULT; - keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); - if (keys_info_len == SIZE_MAX) + if (read_keys.num_keys > PR_KEYS_MAX) return -EINVAL; - keys_info = kzalloc(keys_info_len, GFP_KERNEL); + keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); + + keys_info = kvzalloc(keys_info_len, GFP_KERNEL); if (!keys_info) return -ENOMEM; @@ -473,7 +474,7 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, if (copy_to_user(arg, &read_keys, sizeof(read_keys))) ret = -EFAULT; out: - kfree(keys_info); + kvfree(keys_info); return ret; } diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index 847f3051057a..f0ecb1677317 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -79,4 +79,6 @@ struct pr_read_reservation { #define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) #define IOC_PR_READ_RESERVATION _IOR('p', 207, struct pr_read_reservation) +#define PR_KEYS_MAX (1u << 16) + #endif /* _UAPI_PR_H */ -- cgit v1.2.3 From c258f5c4502c9667bccf5d76fa731ab9c96687c1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Dec 2025 22:34:15 +0800 Subject: ublk: fix deadlock when reading partition table When one process(such as udev) opens ublk block device (e.g., to read the partition table via bdev_open()), a deadlock[1] can occur: 1. bdev_open() grabs disk->open_mutex 2. The process issues read I/O to ublk backend to read partition table 3. In __ublk_complete_rq(), blk_update_request() or blk_mq_end_request() runs bio->bi_end_io() callbacks 4. If this triggers fput() on file descriptor of ublk block device, the work may be deferred to current task's task work (see fput() implementation) 5. This eventually calls blkdev_release() from the same context 6. blkdev_release() tries to grab disk->open_mutex again 7. Deadlock: same task waiting for a mutex it already holds The fix is to run blk_update_request() and blk_mq_end_request() with bottom halves disabled. This forces blkdev_release() to run in kernel work-queue context instead of current task work context, and allows ublk server to make forward progress, and avoids the deadlock. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Link: https://github.com/ublk-org/ublksrv/issues/170 [1] Signed-off-by: Ming Lei Reviewed-by: Caleb Sander Mateos [axboe: rewrite comment in ublk] Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index df9831783a13..cfd2132410dd 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1080,12 +1080,20 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu); } +static void ublk_end_request(struct request *req, blk_status_t error) +{ + local_bh_disable(); + blk_mq_end_request(req, error); + local_bh_enable(); +} + /* todo: handle partial completion */ static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, bool need_map) { unsigned int unmapped_bytes; blk_status_t res = BLK_STS_OK; + bool requeue; /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) @@ -1117,14 +1125,30 @@ static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, if (unlikely(unmapped_bytes < io->res)) io->res = unmapped_bytes; - if (blk_update_request(req, BLK_STS_OK, io->res)) + /* + * Run bio->bi_end_io() with softirqs disabled. If the final fput + * happens off this path, then that will prevent ublk's blkdev_release() + * from being called on current's task work, see fput() implementation. + * + * Otherwise, ublk server may not provide forward progress in case of + * reading the partition table from bdev_open() with disk->open_mutex + * held, and causes dead lock as we could already be holding + * disk->open_mutex here. + * + * Preferably we would not be doing IO with a mutex held that is also + * used for release, but this work-around will suffice for now. + */ + local_bh_disable(); + requeue = blk_update_request(req, BLK_STS_OK, io->res); + local_bh_enable(); + if (requeue) blk_mq_requeue_request(req, true); else if (likely(!blk_should_fake_timeout(req->q))) __blk_mq_end_request(req, BLK_STS_OK); return; exit: - blk_mq_end_request(req, res); + ublk_end_request(req, res); } static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io, @@ -1164,7 +1188,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, if (ublk_nosrv_dev_should_queue_io(ubq->dev)) blk_mq_requeue_request(rq, false); else - blk_mq_end_request(rq, BLK_STS_IOERR); + ublk_end_request(rq, BLK_STS_IOERR); } static void @@ -1209,7 +1233,7 @@ __ublk_do_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, ublk_auto_buf_reg_fallback(ubq, req->tag); return AUTO_BUF_REG_FALLBACK; } - blk_mq_end_request(req, BLK_STS_IOERR); + ublk_end_request(req, BLK_STS_IOERR); return AUTO_BUF_REG_FAIL; } -- cgit v1.2.3 From c9b5645fd8ca10f310e41b07540f98e6a9720f40 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Wed, 17 Dec 2025 10:36:48 +0100 Subject: block: rnbd-clt: Fix leaked ID in init_dev() If kstrdup() fails in init_dev(), then the newly allocated ID is lost. Fixes: 64e8a6ece1a5 ("block/rnbd-clt: Dynamically alloc buffer for pathname & blk_symlink_name") Signed-off-by: Thomas Fourier Acked-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index f1409e54010a..d1c354636315 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1423,9 +1423,11 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_alloc; } - ret = ida_alloc_max(&index_ida, (1 << (MINORBITS - RNBD_PART_BITS)) - 1, - GFP_KERNEL); - if (ret < 0) { + dev->clt_device_id = ida_alloc_max(&index_ida, + (1 << (MINORBITS - RNBD_PART_BITS)) - 1, + GFP_KERNEL); + if (dev->clt_device_id < 0) { + ret = dev->clt_device_id; pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", pathname, sess->sessname, ret); goto out_queues; @@ -1434,10 +1436,9 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, dev->pathname = kstrdup(pathname, GFP_KERNEL); if (!dev->pathname) { ret = -ENOMEM; - goto out_queues; + goto out_ida; } - dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; dev->nr_poll_queues = nr_poll_queues; @@ -1453,6 +1454,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, return dev; +out_ida: + ida_free(&index_ida, dev->clt_device_id); out_queues: kfree(dev->hw_queues); out_alloc: -- cgit v1.2.3 From ccb8a3c08adf8121e2afb8e704f007ce99324d79 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 16 Dec 2025 22:34:35 -0700 Subject: block: validate pi_offset integrity limit The PI tuple must be contained within the metadata value, so validate that pi_offset + pi_tuple_size <= metadata_size. This guards against block drivers that report invalid pi_offset values. Signed-off-by: Caleb Sander Mateos Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 51401f08ce05..d138abc973bb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -161,10 +161,9 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) return -EINVAL; } - if (bi->pi_tuple_size > bi->metadata_size) { - pr_warn("pi_tuple_size (%u) exceeds metadata_size (%u)\n", - bi->pi_tuple_size, - bi->metadata_size); + if (bi->pi_offset + bi->pi_tuple_size > bi->metadata_size) { + pr_warn("pi_offset (%u) + pi_tuple_size (%u) exceeds metadata_size (%u)\n", + bi->pi_offset, bi->pi_tuple_size, bi->metadata_size); return -EINVAL; } -- cgit v1.2.3 From af65faf34f6e9919bdd2912770d25d2a73cbcc7c Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 16 Dec 2025 22:34:36 -0700 Subject: block: validate interval_exp integrity limit Various code assumes that the integrity interval is at least 1 sector and evenly divides the logical block size. Add these checks to blk_validate_integrity_limits(). This guards against block drivers that report invalid interval_exp values. Signed-off-by: Caleb Sander Mateos Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index d138abc973bb..a9e65dc090da 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -193,8 +193,13 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) break; } - if (!bi->interval_exp) + if (!bi->interval_exp) { bi->interval_exp = ilog2(lim->logical_block_size); + } else if (bi->interval_exp < SECTOR_SHIFT || + bi->interval_exp > ilog2(lim->logical_block_size)) { + pr_warn("invalid interval_exp %u\n", bi->interval_exp); + return -EINVAL; + } /* * The PI generation / validation helpers do not expect intervals to -- cgit v1.2.3