nvme fixes for Linux 7.0

- Improve quirk visibility and configurability (Maurizio)
  - Fix runtime user modification to queue setup (Keith)
  - Fix multipath leak on try_module_get failure (Keith)
  - Ignore ambiguous spec definitions for better atomics support (John)
  - Fix admin queue leak on controller reset (Ming)
  - Fix large allocation in persistent reservation read keys (Sungwoo Kim)
  - Fix fcloop callback handling (Justin)
  - Securely free DHCHAP secrets (Daniel)
  - Various cleanups and typo fixes (John, Wilfred)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE3Fbyvv+648XNRdHTPe3zGtjzRgkFAmmoSbMACgkQPe3zGtjz
 RgkpuQ/9EfCp24xowwKEXycX7pquojwjEAh1n5WsUyBDXQls/7Dq3w0EXtkc8fA8
 SUcDpTj7ABiF/faschCoFO47R5/0TPtNMCleWFSdW0OG6B7IYaUt9Cj86JK1dzme
 Zn7luH47Pesmd+H184IOIfDhsiVs5Z3YCISlT1aa1EFg+3/neDqGGpT4+ySOjSZe
 9j8ASUTOqfuBZ2Xc8RNvumABBEkEkUd4xwYTLRi+o/PR9econGrpiEqDyUBAf8dr
 VrZoL0aoQoUEaU08tJOci4GH3Spp4RXlpQo92RBE4yDTxWozRRBWwoCycmPKHQ5b
 +5nC77t1p2OyzgP0xPngQZVMi7A+QTFZf4shq0Xho5kifjB8ZTqVSJJSGK7RlwE4
 GmXgHfMs8Gvn3aew8BcpXilhe4InXfY1LqYmTvJxo9VLK/u7apo94vrJICewHh2z
 lsiWTOHe9xSm8wR20fcxp3D3kXpQ5sMcMoco96dVFetw1WNE30qDy+xtpOvPwdL5
 9mloguR7Pmsu+gVim2VaqSA8HsPIYEbXymLMVzTeVbtPALzrKsGLLW8k/DYFhSTm
 +Ow4KeItyL5hgDU2jenjS3xwshKqKTeJDueue4WBFxgqdbH9hwiJ6aVWS2eoJxev
 RAZXSGTmxEo8X5nDsNz048iT96lFpM7ERViHOWnrptLcFX4yFNM=
 =fMd5
 -----END PGP SIGNATURE-----

Merge tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme into block-7.0

Pull NVMe fixes from Keith:

"- Improve quirk visibility and configurability (Maurizio)
 - Fix runtime user modification to queue setup (Keith)
 - Fix multipath leak on try_module_get failure (Keith)
 - Ignore ambiguous spec definitions for better atomics support (John)
 - Fix admin queue leak on controller reset (Ming)
 - Fix large allocation in persistent reservation read keys (Sungwoo Kim)
 - Fix fcloop callback handling (Justin)
 - Securely free DHCHAP secrets (Daniel)
 - Various cleanups and typo fixes (John, Wilfred)"

* tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme:
  nvme: fix memory allocation in nvme_pr_read_keys()
  nvme-multipath: fix leak on try_module_get failure
  nvmet-fcloop: Check remoteport port_state before calling done callback
  nvme-pci: do not try to add queue maps at runtime
  nvme-pci: cap queue creation to used queues
  nvme-pci: ensure we're polling a polled queue
  nvme: fix memory leak in quirks_param_set()
  nvme: correct comment about nvme_ns_remove()
  nvme: stop setting namespace gendisk device driver data
  nvme: add support for dynamic quirk configuration via module parameter
  nvme: fix admin queue leak on controller reset
  nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
  nvme: stop using AWUPF
  nvme: expose active quirks in sysfs
  nvme/host: fixup some typos
This commit is contained in:
Jens Axboe 2026-03-04 08:15:17 -07:00
commit d90c470b0e
10 changed files with 312 additions and 37 deletions

View File

@ -74,6 +74,7 @@
TPM TPM drivers are enabled.
UMS USB Mass Storage support is enabled.
USB USB support is enabled.
NVME NVMe support is enabled
USBHID USB Human Interface Device support is enabled.
V4L Video For Linux support is enabled.
VGA The VGA console has been enabled.
@ -4787,6 +4788,18 @@ Kernel parameters
This can be set from sysctl after boot.
See Documentation/admin-guide/sysctl/vm.rst for details.
nvme.quirks= [NVME] A list of quirk entries to augment the built-in
nvme quirk list. List entries are separated by a
'-' character.
Each entry has the form VendorID:ProductID:quirk_names.
The IDs are 4-digits hex numbers and quirk_names is a
list of quirk names separated by commas. A quirk name
can be prefixed by '^', meaning that the specified
quirk must be disabled.
Example:
nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi
ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
See Documentation/core-api/debugging-via-ohci1394.rst for more
info.

View File

@ -2046,14 +2046,10 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
} else {
/*
* Use the controller wide atomic write unit. This sucks
* because the limit is defined in terms of logical blocks while
* namespaces can have different formats, and because there is
* no clear language in the specification prohibiting different
* values for different controllers in the subsystem.
*/
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
if (ns->ctrl->awupf)
dev_info_once(ns->ctrl->device,
"AWUPF ignored, only NAWUPF accepted\n");
atomic_bs = bs;
}
lim->atomic_write_hw_max = atomic_bs;
@ -3222,7 +3218,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC ||
@ -3655,6 +3650,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;
@ -4186,13 +4182,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
nvme_mpath_add_disk(ns, info->anagrpid);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
/*
* Set ns->disk->device->driver_data to ns so we can access
* ns->head->passthru_err_log_enabled in
* nvme_io_passthru_err_log_enabled_[store | show]().
*/
dev_set_drvdata(disk_to_dev(ns->disk), ns);
return;
out_cleanup_ns_from_list:
@ -4865,6 +4854,13 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ret)
return ret;
/*
* If a previous admin queue exists (e.g., from before a reset),
* put it now before allocating a new one to avoid orphaning it.
*/
if (ctrl->admin_q)
blk_put_queue(ctrl->admin_q);
ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->admin_q)) {
ret = PTR_ERR(ctrl->admin_q);

View File

@ -1290,8 +1290,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
kfree(opts->subsysnqn);
kfree(opts->host_traddr);
kfree(opts->host_iface);
kfree(opts->dhchap_secret);
kfree(opts->dhchap_ctrl_secret);
kfree_sensitive(opts->dhchap_secret);
kfree_sensitive(opts->dhchap_ctrl_secret);
kfree(opts);
}
EXPORT_SYMBOL_GPL(nvmf_free_options);

View File

@ -1300,7 +1300,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
mutex_lock(&head->subsys->lock);
/*
* We are called when all paths have been removed, and at that point
* head->list is expected to be empty. However, nvme_remove_ns() and
* head->list is expected to be empty. However, nvme_ns_remove() and
* nvme_init_ns_head() can run concurrently and so if head->delayed_
* removal_secs is configured, it is possible that by the time we reach
* this point, head->list may no longer be empty. Therefore, we recheck
@ -1310,13 +1310,11 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
if (!list_empty(&head->list))
goto out;
if (head->delayed_removal_secs) {
/*
* Ensure that no one could remove this module while the head
* remove work is pending.
*/
if (!try_module_get(THIS_MODULE))
goto out;
/*
* Ensure that no one could remove this module while the head
* remove work is pending.
*/
if (head->delayed_removal_secs && try_module_get(THIS_MODULE)) {
mod_delayed_work(nvme_wq, &head->remove_work,
head->delayed_removal_secs * HZ);
} else {

View File

@ -180,6 +180,60 @@ enum nvme_quirks {
NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22),
};
static inline char *nvme_quirk_name(enum nvme_quirks q)
{
switch (q) {
case NVME_QUIRK_STRIPE_SIZE:
return "stripe_size";
case NVME_QUIRK_IDENTIFY_CNS:
return "identify_cns";
case NVME_QUIRK_DEALLOCATE_ZEROES:
return "deallocate_zeroes";
case NVME_QUIRK_DELAY_BEFORE_CHK_RDY:
return "delay_before_chk_rdy";
case NVME_QUIRK_NO_APST:
return "no_apst";
case NVME_QUIRK_NO_DEEPEST_PS:
return "no_deepest_ps";
case NVME_QUIRK_QDEPTH_ONE:
return "qdepth_one";
case NVME_QUIRK_MEDIUM_PRIO_SQ:
return "medium_prio_sq";
case NVME_QUIRK_IGNORE_DEV_SUBNQN:
return "ignore_dev_subnqn";
case NVME_QUIRK_DISABLE_WRITE_ZEROES:
return "disable_write_zeroes";
case NVME_QUIRK_SIMPLE_SUSPEND:
return "simple_suspend";
case NVME_QUIRK_SINGLE_VECTOR:
return "single_vector";
case NVME_QUIRK_128_BYTES_SQES:
return "128_bytes_sqes";
case NVME_QUIRK_SHARED_TAGS:
return "shared_tags";
case NVME_QUIRK_NO_TEMP_THRESH_CHANGE:
return "no_temp_thresh_change";
case NVME_QUIRK_NO_NS_DESC_LIST:
return "no_ns_desc_list";
case NVME_QUIRK_DMA_ADDRESS_BITS_48:
return "dma_address_bits_48";
case NVME_QUIRK_SKIP_CID_GEN:
return "skip_cid_gen";
case NVME_QUIRK_BOGUS_NID:
return "bogus_nid";
case NVME_QUIRK_NO_SECONDARY_TEMP_THRESH:
return "no_secondary_temp_thresh";
case NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND:
return "force_no_simple_suspend";
case NVME_QUIRK_BROKEN_MSI:
return "broken_msi";
case NVME_QUIRK_DMAPOOL_ALIGN_512:
return "dmapool_align_512";
}
return "unknown";
}
/*
* Common request structure for NVMe passthrough. All drivers must have
* this structure as the first member of their request-private data.
@ -410,6 +464,8 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@ -442,7 +498,6 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
u16 awupf; /* 0's based value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;

View File

@ -72,6 +72,13 @@
static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <=
(1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE));
struct quirk_entry {
u16 vendor_id;
u16 dev_id;
u32 enabled_quirks;
u32 disabled_quirks;
};
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0444);
@ -102,6 +109,143 @@ static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
static struct quirk_entry *nvme_pci_quirk_list;
static unsigned int nvme_pci_quirk_count;
/* Helper to parse individual quirk names */
static int nvme_parse_quirk_names(char *quirk_str, struct quirk_entry *entry)
{
int i;
size_t field_len;
bool disabled, found;
char *p = quirk_str, *field;
while ((field = strsep(&p, ",")) && *field) {
disabled = false;
found = false;
if (*field == '^') {
/* Skip the '^' character */
disabled = true;
field++;
}
field_len = strlen(field);
for (i = 0; i < 32; i++) {
unsigned int bit = 1U << i;
char *q_name = nvme_quirk_name(bit);
size_t q_len = strlen(q_name);
if (!strcmp(q_name, "unknown"))
break;
if (!strcmp(q_name, field) &&
q_len == field_len) {
if (disabled)
entry->disabled_quirks |= bit;
else
entry->enabled_quirks |= bit;
found = true;
break;
}
}
if (!found) {
pr_err("nvme: unrecognized quirk %s\n", field);
return -EINVAL;
}
}
return 0;
}
/* Helper to parse a single VID:DID:quirk_names entry */
static int nvme_parse_quirk_entry(char *s, struct quirk_entry *entry)
{
char *field;
field = strsep(&s, ":");
if (!field || kstrtou16(field, 16, &entry->vendor_id))
return -EINVAL;
field = strsep(&s, ":");
if (!field || kstrtou16(field, 16, &entry->dev_id))
return -EINVAL;
field = strsep(&s, ":");
if (!field)
return -EINVAL;
return nvme_parse_quirk_names(field, entry);
}
static int quirks_param_set(const char *value, const struct kernel_param *kp)
{
int count, err, i;
struct quirk_entry *qlist;
char *field, *val, *sep_ptr;
err = param_set_copystring(value, kp);
if (err)
return err;
val = kstrdup(value, GFP_KERNEL);
if (!val)
return -ENOMEM;
if (!*val)
goto out_free_val;
count = 1;
for (i = 0; val[i]; i++) {
if (val[i] == '-')
count++;
}
qlist = kcalloc(count, sizeof(*qlist), GFP_KERNEL);
if (!qlist) {
err = -ENOMEM;
goto out_free_val;
}
i = 0;
sep_ptr = val;
while ((field = strsep(&sep_ptr, "-"))) {
if (nvme_parse_quirk_entry(field, &qlist[i])) {
pr_err("nvme: failed to parse quirk string %s\n",
value);
goto out_free_qlist;
}
i++;
}
kfree(nvme_pci_quirk_list);
nvme_pci_quirk_count = count;
nvme_pci_quirk_list = qlist;
goto out_free_val;
out_free_qlist:
kfree(qlist);
out_free_val:
kfree(val);
return err;
}
static char quirks_param[128];
static const struct kernel_param_ops quirks_param_ops = {
.set = quirks_param_set,
.get = param_get_string,
};
static struct kparam_string quirks_param_string = {
.maxlen = sizeof(quirks_param),
.string = quirks_param,
};
module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0444);
MODULE_PARM_DESC(quirks, "Enable/disable NVMe quirks by specifying "
"quirks=VID:DID:quirk_names");
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
unsigned int n;
@ -1496,7 +1640,8 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
struct nvme_queue *nvmeq = hctx->driver_data;
bool found;
if (!nvme_cqe_pending(nvmeq))
if (!test_bit(NVMEQ_POLLED, &nvmeq->flags) ||
!nvme_cqe_pending(nvmeq))
return 0;
spin_lock(&nvmeq->cq_poll_lock);
@ -2774,7 +2919,25 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
nr_io_queues = dev->nr_allocated_queues - 1;
if (dev->ctrl.tagset) {
/*
* The set's maps are allocated only once at initialization
* time. We can't add special queues later if their mq_map
* wasn't preallocated.
*/
if (dev->ctrl.tagset->nr_maps < 3)
dev->nr_poll_queues = 0;
if (dev->ctrl.tagset->nr_maps < 2)
dev->nr_write_queues = 0;
}
/*
* The initial number of allocated queue slots may be too large if the
* user reduced the special queue parameters. Cap the value to the
* number we need for this round.
*/
nr_io_queues = min(nvme_max_io_queues(dev),
dev->nr_allocated_queues - 1);
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
@ -3458,12 +3621,25 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0;
}
static struct quirk_entry *detect_dynamic_quirks(struct pci_dev *pdev)
{
int i;
for (i = 0; i < nvme_pci_quirk_count; i++)
if (pdev->vendor == nvme_pci_quirk_list[i].vendor_id &&
pdev->device == nvme_pci_quirk_list[i].dev_id)
return &nvme_pci_quirk_list[i];
return NULL;
}
static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
const struct pci_device_id *id)
{
unsigned long quirks = id->driver_data;
int node = dev_to_node(&pdev->dev);
struct nvme_dev *dev;
struct quirk_entry *qentry;
int ret = -ENOMEM;
dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids),
@ -3495,6 +3671,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
"platform quirk: setting simple suspend\n");
quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
}
qentry = detect_dynamic_quirks(pdev);
if (qentry) {
quirks |= qentry->enabled_quirks;
quirks &= ~qentry->disabled_quirks;
}
ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
quirks);
if (ret)
@ -4095,6 +4276,7 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void)
{
kfree(nvme_pci_quirk_list);
pci_unregister_driver(&nvme_driver);
flush_workqueue(nvme_wq);
}

View File

@ -242,7 +242,7 @@ static int nvme_pr_read_keys(struct block_device *bdev,
if (rse_len > U32_MAX)
return -EINVAL;
rse = kzalloc(rse_len, GFP_KERNEL);
rse = kvzalloc(rse_len, GFP_KERNEL);
if (!rse)
return -ENOMEM;
@ -267,7 +267,7 @@ static int nvme_pr_read_keys(struct block_device *bdev,
}
free_rse:
kfree(rse);
kvfree(rse);
return ret;
}

View File

@ -601,6 +601,28 @@ static ssize_t dctype_show(struct device *dev,
}
static DEVICE_ATTR_RO(dctype);
static ssize_t quirks_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int count = 0, i;
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
unsigned long quirks = ctrl->quirks;
if (!quirks)
return sysfs_emit(buf, "none\n");
for (i = 0; quirks; ++i) {
if (quirks & 1) {
count += sysfs_emit_at(buf, count, "%s\n",
nvme_quirk_name(BIT(i)));
}
quirks >>= 1;
}
return count;
}
static DEVICE_ATTR_RO(quirks);
#ifdef CONFIG_NVME_HOST_AUTH
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
struct device_attribute *attr, char *buf)
@ -742,6 +764,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_kato.attr,
&dev_attr_cntrltype.attr,
&dev_attr_dctype.attr,
&dev_attr_quirks.attr,
#ifdef CONFIG_NVME_HOST_AUTH
&dev_attr_dhchap_secret.attr,
&dev_attr_dhchap_ctrl_secret.attr,

View File

@ -25,7 +25,8 @@
struct nvme_tcp_queue;
/* Define the socket priority to use for connections were it is desirable
/*
* Define the socket priority to use for connections where it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
* possible optimization. Making it a module param allows for alternative
@ -926,7 +927,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
req->curr_bio = req->curr_bio->bi_next;
/*
* If we don`t have any bios it means that controller
* If we don't have any bios it means the controller
* sent more data than we requested, hence error
*/
if (!req->curr_bio) {

View File

@ -491,6 +491,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
struct fcloop_rport *rport = remoteport->private;
struct nvmet_fc_target_port *targetport = rport->targetport;
struct fcloop_tport *tport;
int ret = 0;
if (!targetport) {
/*
@ -500,12 +501,18 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
* We end up here from delete association exchange:
* nvmet_fc_xmt_disconnect_assoc sends an async request.
*
* Return success because this is what LLDDs do; silently
* drop the response.
* Return success when remoteport is still online because this
* is what LLDDs do and silently drop the response. Otherwise,
* return with error to signal upper layer to perform the lsrsp
* resource cleanup.
*/
lsrsp->done(lsrsp);
if (remoteport->port_state == FC_OBJSTATE_ONLINE)
lsrsp->done(lsrsp);
else
ret = -ENODEV;
kmem_cache_free(lsreq_cache, tls_req);
return 0;
return ret;
}
memcpy(lsreq->rspaddr, lsrsp->rspbuf,