Merge tag 'nvme-6.9-2024-03-21' of git://git.infradead.org/nvme into block-6.9
authorJens Axboe <axboe@kernel.dk>
Thu, 21 Mar 2024 19:23:07 +0000 (13:23 -0600)
committerJens Axboe <axboe@kernel.dk>
Thu, 21 Mar 2024 19:23:07 +0000 (13:23 -0600)
Pull NVMe fixes from Keith:

"nvme updates for Linux 6.9

 - Make an informative message less ominous (Keith)
 - Enhanced trace decoding (Guixin)
 - TCP updates (Hannes, Li)
 - Fabrics connect deadlock fix (Chunguang)
 - Platform API migration update (Uwe)
 - A new device quirk (Jiawei)"

* tag 'nvme-6.9-2024-03-21' of git://git.infradead.org/nvme:
  nvmet-rdma: remove NVMET_RDMA_REQ_INVALIDATE_RKEY flag
  nvme: remove redundant BUILD_BUG_ON check
  nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq
  nvme-tcp: Export the nvme_tcp_wq to sysfs
  drivers/nvme: Add quirks for device 126f:2262
  nvme: parse format command's lbafu when tracing
  nvme: add tracing of reservation commands
  nvme: parse zns command's zsa and zrasf to string
  nvme: use nvme_disk_is_ns_head helper
  nvme: fix reconnection fail due to reserved tag allocation
  nvmet: add tracing of zns commands
  nvmet: add tracing of authentication commands
  nvme-apple: Convert to platform remove callback returning void
  nvmet-tcp: do not continue for invalid icreq
  nvme: change shutdown timeout setting message

1  2 
drivers/nvme/host/core.c
drivers/nvme/host/sysfs.c
drivers/nvme/host/tcp.c
drivers/nvme/target/tcp.c

diff --combined drivers/nvme/host/core.c
index 00864a63447099bca59fa45f8f6076933b58f836,2120059337829ade03d49232b45e43b4a8345433..943d72bdd794ca5e6258cb02841447ca38898251
@@@ -722,7 -722,7 +722,7 @@@ void nvme_init_request(struct request *
        if (req->q->queuedata) {
                struct nvme_ns *ns = req->q->disk->private_data;
  
 -              logging_enabled = ns->passthru_err_log_enabled;
 +              logging_enabled = ns->head->passthru_err_log_enabled;
                req->timeout = NVME_IO_TIMEOUT;
        } else { /* no queuedata implies admin queue */
                logging_enabled = nr->ctrl->passthru_err_log_enabled;
@@@ -1162,10 -1162,6 +1162,10 @@@ u32 nvme_command_effects(struct nvme_ct
                effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
        } else {
                effects = le32_to_cpu(ctrl->effects->acs[opcode]);
 +
 +              /* Ignore execution restrictions if any relaxation bits are set */
 +              if (effects & NVME_CMD_EFFECTS_CSER_MASK)
 +                      effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
        }
  
        return effects;
@@@ -1807,9 -1803,6 +1807,6 @@@ static void nvme_config_discard(struct 
  {
        struct nvme_ctrl *ctrl = ns->ctrl;
  
-       BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
-                       NVME_DSM_MAX_RANGES);
        if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
                lim->max_hw_discard_sectors =
                        nvme_lba_to_sect(ns->head, ctrl->dmrsl);
@@@ -3237,7 -3230,7 +3234,7 @@@ static int nvme_init_identify(struct nv
  
                if (ctrl->shutdown_timeout != shutdown_timeout)
                        dev_info(ctrl->device,
-                                "Shutdown timeout set to %u seconds\n",
+                                "D3 entry latency set to %u seconds\n",
                                 ctrl->shutdown_timeout);
        } else
                ctrl->shutdown_timeout = shutdown_timeout;
@@@ -3731,6 -3724,7 +3728,6 @@@ static void nvme_alloc_ns(struct nvme_c
  
        ns->disk = disk;
        ns->queue = disk->queue;
 -      ns->passthru_err_log_enabled = false;
  
        if (ctrl->opts && ctrl->opts->data_digest)
                blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
  
        /*
         * Set ns->disk->device->driver_data to ns so we can access
 -       * ns->logging_enabled in nvme_passthru_err_log_enabled_store() and
 -       * nvme_passthru_err_log_enabled_show().
 +       * ns->head->passthru_err_log_enabled in
 +       * nvme_io_passthru_err_log_enabled_[store | show]().
         */
        dev_set_drvdata(disk_to_dev(ns->disk), ns);
  
@@@ -4225,7 -4219,6 +4222,7 @@@ static bool nvme_ctrl_pp_status(struct 
  static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
  {
        struct nvme_fw_slot_info_log *log;
 +      u8 next_fw_slot, cur_fw_slot;
  
        log = kmalloc(sizeof(*log), GFP_KERNEL);
        if (!log)
                goto out_free_log;
        }
  
 -      if (log->afi & 0x70 || !(log->afi & 0x7)) {
 +      cur_fw_slot = log->afi & 0x7;
 +      next_fw_slot = (log->afi & 0x70) >> 4;
 +      if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) {
                dev_info(ctrl->device,
                         "Firmware is activated after next Controller Level Reset\n");
                goto out_free_log;
        }
  
 -      memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1],
 +      memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1],
                sizeof(ctrl->subsys->firmware_rev));
  
  out_free_log:
@@@ -4391,7 -4382,8 +4388,8 @@@ int nvme_alloc_admin_tag_set(struct nvm
        set->ops = ops;
        set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
        if (ctrl->ops->flags & NVME_F_FABRICS)
-               set->reserved_tags = NVMF_RESERVED_TAGS;
+               /* Reserved for fabric connect and keep alive */
+               set->reserved_tags = 2;
        set->numa_node = ctrl->numa_node;
        set->flags = BLK_MQ_F_NO_SCHED;
        if (ctrl->ops->flags & NVME_F_BLOCKING)
@@@ -4460,7 -4452,8 +4458,8 @@@ int nvme_alloc_io_tag_set(struct nvme_c
        if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
                set->reserved_tags = NVME_AQ_DEPTH;
        else if (ctrl->ops->flags & NVME_F_FABRICS)
-               set->reserved_tags = NVMF_RESERVED_TAGS;
+               /* Reserved for fabric connect */
+               set->reserved_tags = 1;
        set->numa_node = ctrl->numa_node;
        set->flags = BLK_MQ_F_SHOULD_MERGE;
        if (ctrl->ops->flags & NVME_F_BLOCKING)
index 09fcaa519e5bc26618eae900a0830a89e6aebbb5,243ebc4d471a8bc366b73d98af9418e6c1082392..3c55f7edd181939fc8a37b47e0ce56fdb271e434
@@@ -48,8 -48,8 +48,8 @@@ static ssize_t nvme_adm_passthru_err_lo
                struct device_attribute *attr, const char *buf, size_t count)
  {
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 -      int err;
        bool passthru_err_log_enabled;
 +      int err;
  
        err = kstrtobool(buf, &passthru_err_log_enabled);
        if (err)
        return count;
  }
  
 +static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 +{
 +      struct gendisk *disk = dev_to_disk(dev);
 +
 +      if (nvme_disk_is_ns_head(disk))
 +              return disk->private_data;
 +      return nvme_get_ns_from_dev(dev)->head;
 +}
 +
  static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev,
                struct device_attribute *attr, char *buf)
  {
 -      struct nvme_ns *n = dev_get_drvdata(dev);
 +      struct nvme_ns_head *head = dev_to_ns_head(dev);
  
 -      return sysfs_emit(buf, n->passthru_err_log_enabled ? "on\n" : "off\n");
 +      return sysfs_emit(buf, head->passthru_err_log_enabled ? "on\n" : "off\n");
  }
  
  static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
  {
 -      struct nvme_ns *ns = dev_get_drvdata(dev);
 -      int err;
 +      struct nvme_ns_head *head = dev_to_ns_head(dev);
        bool passthru_err_log_enabled;
 +      int err;
  
        err = kstrtobool(buf, &passthru_err_log_enabled);
        if (err)
                return -EINVAL;
 -      ns->passthru_err_log_enabled = passthru_err_log_enabled;
 +      head->passthru_err_log_enabled = passthru_err_log_enabled;
  
        return count;
  }
@@@ -100,6 -91,15 +100,6 @@@ static struct device_attribute dev_attr
        __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
        nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store);
  
 -static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 -{
 -      struct gendisk *disk = dev_to_disk(dev);
 -
 -      if (nvme_disk_is_ns_head(disk))
 -              return disk->private_data;
 -      return nvme_get_ns_from_dev(dev)->head;
 -}
 -
  static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
                char *buf)
  {
@@@ -236,8 -236,7 +236,7 @@@ static ssize_t nuse_show(struct device 
        struct block_device *bdev = disk->part0;
        int ret;
  
-       if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
-           bdev->bd_disk->fops == &nvme_ns_head_ops)
+       if (nvme_disk_is_ns_head(bdev->bd_disk))
                ret = ns_head_update_nuse(head);
        else
                ret = ns_update_nuse(bdev->bd_disk->private_data);
diff --combined drivers/nvme/host/tcp.c
index 3692b56cb58dbacf53eba56aef841dc50063557a,34a882b2ec53d8e8f626717f9995c4d6d1cb25dd..fdbcdcedcee99f064cc7258d22b7fe737d285eda
@@@ -36,6 -36,14 +36,14 @@@ static int so_priority
  module_param(so_priority, int, 0644);
  MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
  
+ /*
+  * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
+  * from sysfs.
+  */
+ static bool wq_unbound;
+ module_param(wq_unbound, bool, 0644);
+ MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)");
  /*
   * TLS handshake timeout
   */
@@@ -1344,6 -1352,7 +1352,6 @@@ static int nvme_tcp_alloc_async_req(str
  
  static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
  {
 -      struct page *page;
        struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
        struct nvme_tcp_queue *queue = &ctrl->queues[qid];
        unsigned int noreclaim_flag;
        if (queue->hdr_digest || queue->data_digest)
                nvme_tcp_free_crypto(queue);
  
 -      if (queue->pf_cache.va) {
 -              page = virt_to_head_page(queue->pf_cache.va);
 -              __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
 -              queue->pf_cache.va = NULL;
 -      }
 +      page_frag_cache_drain(&queue->pf_cache);
  
        noreclaim_flag = memalloc_noreclaim_save();
        /* ->sock will be released by fput() */
@@@ -1546,7 -1559,10 +1554,10 @@@ static void nvme_tcp_set_queue_io_cpu(s
        else if (nvme_tcp_poll_queue(queue))
                n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
                                ctrl->io_queues[HCTX_TYPE_READ] - 1;
-       queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+       if (wq_unbound)
+               queue->io_cpu = WORK_CPU_UNBOUND;
+       else
+               queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
  }
  
  static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@@@ -2785,6 -2801,8 +2796,8 @@@ static struct nvmf_transport_ops nvme_t
  
  static int __init nvme_tcp_init_module(void)
  {
+       unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
        BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
        BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
        BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
        BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
        BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
  
-       nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
-                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+       if (wq_unbound)
+               wq_flags |= WQ_UNBOUND;
+       nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
        if (!nvme_tcp_wq)
                return -ENOMEM;
  
index 2aa5762e9f50d0d3ae3c6cfceb9cd03ce1ad8df4,022d17bd36bf405946427065de375ad7c620fb93..a5422e2c979addca1f219777b47f61f5817e302a
@@@ -898,6 -898,7 +898,7 @@@ static int nvmet_tcp_handle_icreq(struc
                pr_err("bad nvme-tcp pdu length (%d)\n",
                        le32_to_cpu(icreq->hdr.plen));
                nvmet_tcp_fatal_error(queue);
+               return -EPROTO;
        }
  
        if (icreq->pfv != NVME_TCP_PFV_1_0) {
@@@ -1591,6 -1592,7 +1592,6 @@@ static void nvmet_tcp_free_cmd_data_in_
  
  static void nvmet_tcp_release_queue_work(struct work_struct *w)
  {
 -      struct page *page;
        struct nvmet_tcp_queue *queue =
                container_of(w, struct nvmet_tcp_queue, release_work);
  
        if (queue->hdr_digest || queue->data_digest)
                nvmet_tcp_free_crypto(queue);
        ida_free(&nvmet_tcp_queue_ida, queue->idx);
 -      page = virt_to_head_page(queue->pf_cache.va);
 -      __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
 +      page_frag_cache_drain(&queue->pf_cache);
        kfree(queue);
  }