mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 11:35:45 +00:00
for-6.13/block-20241118
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmc7S40QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpjHVD/43rDZ8ehs+IAAr6S0RemNX1SRG0mK2UOEb kMoNogS7StO/c4JYW3JuzCyLRn5ZsgeWV/muqxwDEWQrmTGrvi+V45KikrZPwm3k p0ump33qV9EU2jiR1MKZjtwK2P0CI7/DD3W8ww6IOvKbTT7RcqQcdHznvXArFBtc xCuQPpayFG7ZasC+N9VaBwtiUEVgU3Ek9AFT7UVZRWajjHPNalQwaooJWayO0rEG KdoW5yG0ryLrgCY2ACSvRLS+2s14EJtb8hgT08WKHTNgd5LxhSKxfsTapamua+7U FdVS6Ij0tEkgu2jpvgj7QKO0Uw10Cnep2gj7RHts/LVewvkliS6XcheOzqRS1jWU I2EI+UaGOZ11OUiw52VIveEVS5zV/NWhgy5BSP9LYEvXw0BUAHRDYGMem8o5G1V1 SWqjIM1UWvcQDlAnMF9FDVzojvjVUmYWvcAlFFztO8J0B7SavHR3NcfHwEf57reH rNoUbi/9c4/wjJJF33gejiR5pU+ewy/Mk75GrtX3xpEqlztfRbf9/FbPCMEAO1KR DF/b3lkUV9i2/BRW6a0SpZ5RDSmSYMnateel6TrPyVSRnpiSSFO8FrbynwUOa17b 6i49YDFWzzXOrR1YWDg6IEtTrcmBEmvi7F6aoDs020qUnL0hwLn1ZuoIxuiFEpor Z0iFF1B/nw== =PWTH -----END PGP SIGNATURE----- Merge tag 'for-6.13/block-20241118' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - NVMe updates via Keith: - Use uring_cmd helper (Pavel) - Host Memory Buffer allocation enhancements (Christoph) - Target persistent reservation support (Guixin) - Persistent reservation tracing (Guixen) - NVMe 2.1 specification support (Keith) - Rotational Meta Support (Matias, Wang, Keith) - Volatile cache detection enhancment (Guixen) - MD updates via Song: - Maintainers update - raid5 sync IO fix - Enhance handling of faulty and blocked devices - raid5-ppl atomic improvement - md-bitmap fix - Support for manually defining embedded partition tables - Zone append fixes and cleanups - Stop sending the queued requests in the plug list to the driver ->queue_rqs() handle in reverse order. - Zoned write plug cleanups - Cleanups disk stats tracking and add support for disk stats for passthrough IO - Add preparatory support for file system atomic writes - Add lockdep support for queue freezing. Already found a bunch of issues, and some fixes for that are in here. More will be coming. - Fix race between queue stopping/quiescing and IO queueing - ublk recovery improvements - Fix ublk mmap for 64k pages - Various fixes and cleanups * tag 'for-6.13/block-20241118' of git://git.kernel.dk/linux: (118 commits) MAINTAINERS: Update git tree for mdraid subsystem block: make struct rq_list available for !CONFIG_BLOCK block/genhd: use seq_put_decimal_ull for diskstats decimal values block: don't reorder requests in blk_mq_add_to_batch block: don't reorder requests in blk_add_rq_to_plug block: add a rq_list type block: remove rq_list_move virtio_blk: reverse request order in virtio_queue_rqs nvme-pci: reverse request order in nvme_queue_rqs btrfs: validate queue limits block: export blk_validate_limits nvmet: add tracing of reservation commands nvme: parse reservation commands's action and rtype to string nvmet: report ns's vwc not present md/raid5: Increase r5conf.cache_name size block: remove the ioprio field from struct request block: remove the write_hint field from struct request nvme: check ns's volatile write cache not present nvme: add rotational support nvme: use command set independent id ns if available ...
This commit is contained in:
commit
77a0cfafa9
@ -424,6 +424,13 @@ Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk.
|
||||
|
||||
What: /sys/block/<disk>/queue/iostats_passthrough
|
||||
Date: October 2024
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk for passthrough commands.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/logical_block_size
|
||||
Date: May 2009
|
||||
|
@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
|
||||
create a link to block device partition with the name "PARTNAME".
|
||||
User space application can access partition by partition name.
|
||||
|
||||
ro
|
||||
read-only. Flag the partition as read-only.
|
||||
|
||||
Example:
|
||||
|
||||
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
|
||||
|
||||
bootargs::
|
||||
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
|
||||
|
||||
dmesg::
|
||||
|
||||
|
@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:
|
||||
|
||||
- user recovery feature description
|
||||
|
||||
Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``.
|
||||
Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
|
||||
enable recovery of ublk devices after the ublk server exits, the ublk server
|
||||
should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
|
||||
ublk server may additionally specify at most one of
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
|
||||
modify how I/O is handled while the ublk server is dying/dead (this is called
|
||||
the ``nosrv`` case in the driver code).
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
With just ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
|
||||
recovery stage and ublk device ID is kept. It is ublk server's
|
||||
responsibility to recover the device context by its own knowledge.
|
||||
Requests which have not been issued to userspace are requeued. Requests
|
||||
which have been issued to userspace are aborted.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
|
||||
server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after one ubq_daemon
|
||||
(ublk server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
requests which have been issued to userspace are requeued and will be
|
||||
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
|
||||
double-write since the driver may issue the same I/O request twice. It
|
||||
might be useful to a read-only FS or a VM backend.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
|
||||
exits, requests which have issued to userspace are failed, as are any
|
||||
subsequently issued requests. Applications continuously issuing I/O against
|
||||
devices with this flag set will see a stream of I/O errors until a new ublk
|
||||
server recovers the device.
|
||||
|
||||
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
|
||||
Once the flag is set, all control commands can be sent by unprivileged
|
||||
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
|
||||
|
@ -13,6 +13,10 @@ description: |
|
||||
This documents describes the devicetree bindings for a mmc-host controller
|
||||
child node describing a mmc-card / an eMMC.
|
||||
|
||||
It's possible to define a fixed partition table for an eMMC for the user
|
||||
partition, the 2 BOOT partition (boot1/2) and the 4 GP (gp1/2/3/4) if supported
|
||||
by the eMMC.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: mmc-card
|
||||
@ -26,6 +30,24 @@ properties:
|
||||
Use this to indicate that the mmc-card has a broken hpi
|
||||
implementation, and that hpi should not be used.
|
||||
|
||||
patternProperties:
|
||||
"^partitions(-boot[12]|-gp[14])?$":
|
||||
$ref: /schemas/mtd/partitions/partitions.yaml
|
||||
|
||||
patternProperties:
|
||||
"^partition@[0-9a-f]+$":
|
||||
$ref: /schemas/mtd/partitions/partition.yaml
|
||||
|
||||
properties:
|
||||
reg:
|
||||
description: Must be multiple of 512 as it's converted
|
||||
internally from bytes to SECTOR_SIZE (512 bytes)
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
@ -42,6 +64,36 @@ examples:
|
||||
compatible = "mmc-card";
|
||||
reg = <0>;
|
||||
broken-hpi;
|
||||
|
||||
partitions {
|
||||
compatible = "fixed-partitions";
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "kernel"; /* Kernel */
|
||||
reg = <0x0 0x2000000>; /* 32 MB */
|
||||
};
|
||||
|
||||
partition@2000000 {
|
||||
label = "rootfs";
|
||||
reg = <0x2000000 0x40000000>; /* 1GB */
|
||||
};
|
||||
};
|
||||
|
||||
partitions-boot1 {
|
||||
compatible = "fixed-partitions";
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "bl";
|
||||
reg = <0x0 0x2000000>; /* 32MB */
|
||||
read-only;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -21393,11 +21393,11 @@ F: include/linux/property.h
|
||||
|
||||
SOFTWARE RAID (Multiple Disks) SUPPORT
|
||||
M: Song Liu <song@kernel.org>
|
||||
R: Yu Kuai <yukuai3@huawei.com>
|
||||
M: Yu Kuai <yukuai3@huawei.com>
|
||||
L: linux-raid@vger.kernel.org
|
||||
S: Supported
|
||||
Q: https://patchwork.kernel.org/project/linux-raid/list/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux.git
|
||||
F: drivers/md/Kconfig
|
||||
F: drivers/md/Makefile
|
||||
F: drivers/md/md*
|
||||
|
@ -199,7 +199,7 @@ EXPORT_SYMBOL(bio_integrity_add_page);
|
||||
|
||||
static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
int nr_vecs, unsigned int len,
|
||||
unsigned int direction, u32 seed)
|
||||
unsigned int direction)
|
||||
{
|
||||
bool write = direction == ITER_SOURCE;
|
||||
struct bio_integrity_payload *bip;
|
||||
@ -247,7 +247,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_COPY_USER;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
bip->bip_vcnt = nr_vecs;
|
||||
return 0;
|
||||
free_bip:
|
||||
@ -258,7 +257,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
}
|
||||
|
||||
static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
|
||||
int nr_vecs, unsigned int len, u32 seed)
|
||||
int nr_vecs, unsigned int len)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
|
||||
@ -267,7 +266,6 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
|
||||
return PTR_ERR(bip);
|
||||
|
||||
memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
bip->bip_iter.bi_size = len;
|
||||
bip->bip_vcnt = nr_vecs;
|
||||
return 0;
|
||||
@ -303,8 +301,7 @@ static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
|
||||
return nr_bvecs;
|
||||
}
|
||||
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
|
||||
u32 seed)
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
|
||||
@ -350,9 +347,9 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
|
||||
|
||||
if (copy)
|
||||
ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
|
||||
direction, seed);
|
||||
direction);
|
||||
else
|
||||
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
|
||||
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes);
|
||||
if (ret)
|
||||
goto release_pages;
|
||||
if (bvec != stack_vec)
|
||||
|
81
block/bio.c
81
block/bio.c
@ -1064,39 +1064,6 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_pc_page);
|
||||
|
||||
/**
|
||||
* bio_add_zone_append_page - attempt to add page to zone-append bio
|
||||
* @bio: destination bio
|
||||
* @page: page to add
|
||||
* @len: vec entry length
|
||||
* @offset: vec entry offset
|
||||
*
|
||||
* Attempt to add a page to the bio_vec maplist of a bio that will be submitted
|
||||
* for a zone-append request. This can fail for a number of reasons, such as the
|
||||
* bio being full or the target block device is not a zoned block device or
|
||||
* other limitations of the target block device. The target block device must
|
||||
* allow bio's up to PAGE_SIZE, so it is always possible to add a single page
|
||||
* to an empty bio.
|
||||
*
|
||||
* Returns: number of bytes added to the bio, or 0 in case of a failure.
|
||||
*/
|
||||
int bio_add_zone_append_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
bool same_page = false;
|
||||
|
||||
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev)))
|
||||
return 0;
|
||||
|
||||
return bio_add_hw_page(q, bio, page, len, offset,
|
||||
queue_max_zone_append_sectors(q), &same_page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
|
||||
|
||||
/**
|
||||
* __bio_add_page - add page(s) to a bio in a new segment
|
||||
* @bio: destination bio
|
||||
@ -1206,21 +1173,12 @@ EXPORT_SYMBOL_GPL(__bio_release_pages);
|
||||
|
||||
void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
size_t size = iov_iter_count(iter);
|
||||
|
||||
WARN_ON_ONCE(bio->bi_max_vecs);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
size_t max_sectors = queue_max_zone_append_sectors(q);
|
||||
|
||||
size = min(size, max_sectors << SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
bio->bi_vcnt = iter->nr_segs;
|
||||
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
|
||||
bio->bi_iter.bi_bvec_done = iter->iov_offset;
|
||||
bio->bi_iter.bi_size = size;
|
||||
bio->bi_iter.bi_size = iov_iter_count(iter);
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
}
|
||||
|
||||
@ -1245,20 +1203,6 @@ static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bio_iov_add_zone_append_folio(struct bio *bio, struct folio *folio,
|
||||
size_t len, size_t offset)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
bool same_page = false;
|
||||
|
||||
if (bio_add_hw_folio(q, bio, folio, len, offset,
|
||||
queue_max_zone_append_sectors(q), &same_page) != len)
|
||||
return -EINVAL;
|
||||
if (same_page && bio_flagged(bio, BIO_PAGE_PINNED))
|
||||
unpin_user_folio(folio, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int get_contig_folio_len(unsigned int *num_pages,
|
||||
struct page **pages, unsigned int i,
|
||||
struct folio *folio, size_t left,
|
||||
@ -1365,14 +1309,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
len = get_contig_folio_len(&num_pages, pages, i,
|
||||
folio, left, offset);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
ret = bio_iov_add_zone_append_folio(bio, folio, len,
|
||||
folio_offset);
|
||||
if (ret)
|
||||
break;
|
||||
} else
|
||||
bio_iov_add_folio(bio, folio, len, folio_offset);
|
||||
|
||||
bio_iov_add_folio(bio, folio, len, folio_offset);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
@ -1728,16 +1665,22 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
||||
{
|
||||
struct bio *split;
|
||||
|
||||
BUG_ON(sectors <= 0);
|
||||
BUG_ON(sectors >= bio_sectors(bio));
|
||||
if (WARN_ON_ONCE(sectors <= 0))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (WARN_ON_ONCE(sectors >= bio_sectors(bio)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* Zone append commands cannot be split */
|
||||
if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
|
||||
return NULL;
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* atomic writes cannot be split */
|
||||
if (bio->bi_opf & REQ_ATOMIC)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
|
||||
if (!split)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
split->bi_iter.bi_size = sectors << 9;
|
||||
|
||||
|
@ -261,6 +261,8 @@ static void blk_free_queue(struct request_queue *q)
|
||||
blk_mq_release(q);
|
||||
|
||||
ida_free(&blk_queue_ida, q->id);
|
||||
lockdep_unregister_key(&q->io_lock_cls_key);
|
||||
lockdep_unregister_key(&q->q_lock_cls_key);
|
||||
call_rcu(&q->rcu_head, blk_free_queue_rcu);
|
||||
}
|
||||
|
||||
@ -278,18 +280,20 @@ void blk_put_queue(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_put_queue);
|
||||
|
||||
void blk_queue_start_drain(struct request_queue *q)
|
||||
bool blk_queue_start_drain(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* When queue DYING flag is set, we need to block new req
|
||||
* entering queue, so we call blk_freeze_queue_start() to
|
||||
* prevent I/O from crossing blk_queue_enter().
|
||||
*/
|
||||
blk_freeze_queue_start(q);
|
||||
bool freeze = __blk_freeze_queue_start(q, current);
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_wake_waiters(q);
|
||||
/* Make blk_queue_enter() reexamine the DYING flag. */
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
|
||||
return freeze;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -321,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_);
|
||||
rwsem_release(&q->q_lockdep_map, _RET_IP_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -352,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
|
||||
goto dead;
|
||||
}
|
||||
|
||||
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
|
||||
rwsem_release(&q->io_lockdep_map, _RET_IP_);
|
||||
return 0;
|
||||
dead:
|
||||
bio_io_error(bio);
|
||||
@ -441,6 +449,12 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
||||
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
|
||||
if (error)
|
||||
goto fail_stats;
|
||||
lockdep_register_key(&q->io_lock_cls_key);
|
||||
lockdep_register_key(&q->q_lock_cls_key);
|
||||
lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)",
|
||||
&q->io_lock_cls_key, 0);
|
||||
lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)",
|
||||
&q->q_lock_cls_key, 0);
|
||||
|
||||
q->nr_requests = BLKDEV_DEFAULT_RQ;
|
||||
|
||||
@ -593,7 +607,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Make sure the BIO is small enough and will not get split */
|
||||
if (nr_sectors > queue_max_zone_append_sectors(q))
|
||||
if (nr_sectors > q->limits.max_zone_append_sectors)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
bio->bi_opf |= REQ_NOMERGE;
|
||||
@ -1106,8 +1120,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
|
||||
return;
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
plug->mq_list = NULL;
|
||||
plug->cached_rq = NULL;
|
||||
rq_list_init(&plug->mq_list);
|
||||
rq_list_init(&plug->cached_rqs);
|
||||
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
|
||||
plug->rq_count = 0;
|
||||
plug->multiple_queues = false;
|
||||
@ -1203,7 +1217,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
|
||||
* queue for cached requests, we don't want a blocked task holding
|
||||
* up a queue freeze/quiesce event.
|
||||
*/
|
||||
if (unlikely(!rq_list_empty(plug->cached_rq)))
|
||||
if (unlikely(!rq_list_empty(&plug->cached_rqs)))
|
||||
blk_mq_free_plug_rqs(plug);
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
|
@ -226,7 +226,7 @@ static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr)
|
||||
|
||||
split_bio = bio_split(bio, num_sectors, GFP_NOIO,
|
||||
&crypto_bio_split);
|
||||
if (!split_bio) {
|
||||
if (IS_ERR(split_bio)) {
|
||||
bio->bi_status = BLK_STS_RESOURCE;
|
||||
return false;
|
||||
}
|
||||
|
@ -113,9 +113,9 @@ int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
|
||||
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
|
||||
|
||||
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
|
||||
ssize_t bytes, u32 seed)
|
||||
ssize_t bytes)
|
||||
{
|
||||
int ret = bio_integrity_map_user(rq->bio, ubuf, bytes, seed);
|
||||
int ret = bio_integrity_map_user(rq->bio, ubuf, bytes);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -32,13 +32,6 @@ static void get_io_context(struct io_context *ioc)
|
||||
atomic_long_inc(&ioc->refcount);
|
||||
}
|
||||
|
||||
static void icq_free_icq_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
|
||||
|
||||
kmem_cache_free(icq->__rcu_icq_cache, icq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exit an icq. Called with ioc locked for blk-mq, and with both ioc
|
||||
* and queue locked for legacy.
|
||||
@ -102,7 +95,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
|
||||
*/
|
||||
icq->__rcu_icq_cache = et->icq_cache;
|
||||
icq->flags |= ICQ_DESTROYED;
|
||||
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
|
||||
kfree_rcu(icq, __rcu_head);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -107,17 +107,18 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
|
||||
|
||||
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
|
||||
{
|
||||
if (unlikely(split_sectors < 0)) {
|
||||
bio->bi_status = errno_to_blk_status(split_sectors);
|
||||
bio_endio(bio);
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(split_sectors < 0))
|
||||
goto error;
|
||||
|
||||
if (split_sectors) {
|
||||
struct bio *split;
|
||||
|
||||
split = bio_split(bio, split_sectors, GFP_NOIO,
|
||||
&bio->bi_bdev->bd_disk->bio_split);
|
||||
if (IS_ERR(split)) {
|
||||
split_sectors = PTR_ERR(split);
|
||||
goto error;
|
||||
}
|
||||
split->bi_opf |= REQ_NOMERGE;
|
||||
blkcg_bio_issue_init(split);
|
||||
bio_chain(split, bio);
|
||||
@ -128,6 +129,10 @@ static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
|
||||
}
|
||||
|
||||
return bio;
|
||||
error:
|
||||
bio->bi_status = errno_to_blk_status(split_sectors);
|
||||
bio_endio(bio);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
|
||||
@ -166,17 +171,6 @@ struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
|
||||
return bio_submit_split(bio, split_sectors);
|
||||
}
|
||||
|
||||
struct bio *bio_split_write_zeroes(struct bio *bio,
|
||||
const struct queue_limits *lim, unsigned *nsegs)
|
||||
{
|
||||
*nsegs = 0;
|
||||
if (!lim->max_write_zeroes_sectors)
|
||||
return bio;
|
||||
if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
|
||||
return bio;
|
||||
return bio_submit_split(bio, lim->max_write_zeroes_sectors);
|
||||
}
|
||||
|
||||
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
|
||||
bool is_atomic)
|
||||
{
|
||||
@ -211,7 +205,9 @@ static inline unsigned get_max_io_size(struct bio *bio,
|
||||
* We ignore lim->max_sectors for atomic writes because it may less
|
||||
* than the actual bio size, which we cannot tolerate.
|
||||
*/
|
||||
if (is_atomic)
|
||||
if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
|
||||
max_sectors = lim->max_write_zeroes_sectors;
|
||||
else if (is_atomic)
|
||||
max_sectors = lim->atomic_write_max_sectors;
|
||||
else
|
||||
max_sectors = lim->max_sectors;
|
||||
@ -296,6 +292,14 @@ static bool bvec_split_segs(const struct queue_limits *lim,
|
||||
return len > 0 || bv->bv_len > max_len;
|
||||
}
|
||||
|
||||
static unsigned int bio_split_alignment(struct bio *bio,
|
||||
const struct queue_limits *lim)
|
||||
{
|
||||
if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
|
||||
return lim->zone_write_granularity;
|
||||
return lim->logical_block_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_split_rw_at - check if and where to split a read/write bio
|
||||
* @bio: [in] bio to be split
|
||||
@ -358,7 +362,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
|
||||
* split size so that each bio is properly block size aligned, even if
|
||||
* we do not use the full hardware limits.
|
||||
*/
|
||||
bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
|
||||
bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
|
||||
|
||||
/*
|
||||
* Bio splitting may cause subtle trouble such as hang when doing sync
|
||||
@ -388,16 +392,35 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
|
||||
struct bio *bio_split_zone_append(struct bio *bio,
|
||||
const struct queue_limits *lim, unsigned *nr_segs)
|
||||
{
|
||||
unsigned int max_sectors = queue_limits_max_zone_append_sectors(lim);
|
||||
int split_sectors;
|
||||
|
||||
split_sectors = bio_split_rw_at(bio, lim, nr_segs,
|
||||
max_sectors << SECTOR_SHIFT);
|
||||
lim->max_zone_append_sectors << SECTOR_SHIFT);
|
||||
if (WARN_ON_ONCE(split_sectors > 0))
|
||||
split_sectors = -EINVAL;
|
||||
return bio_submit_split(bio, split_sectors);
|
||||
}
|
||||
|
||||
struct bio *bio_split_write_zeroes(struct bio *bio,
|
||||
const struct queue_limits *lim, unsigned *nsegs)
|
||||
{
|
||||
unsigned int max_sectors = get_max_io_size(bio, lim);
|
||||
|
||||
*nsegs = 0;
|
||||
|
||||
/*
|
||||
* An unset limit should normally not happen, as bio submission is keyed
|
||||
* off having a non-zero limit. But SCSI can clear the limit in the
|
||||
* I/O completion handler, and we can race and see this. Splitting to a
|
||||
* zero limit obviously doesn't make sense, so band-aid it here.
|
||||
*/
|
||||
if (!max_sectors)
|
||||
return bio;
|
||||
if (bio_sectors(bio) <= max_sectors)
|
||||
return bio;
|
||||
return bio_submit_split(bio, max_sectors);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_split_to_limits - split a bio to fit the queue limits
|
||||
* @bio: bio to be split
|
||||
@ -411,10 +434,9 @@ struct bio *bio_split_zone_append(struct bio *bio,
|
||||
*/
|
||||
struct bio *bio_split_to_limits(struct bio *bio)
|
||||
{
|
||||
const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
|
||||
unsigned int nr_segs;
|
||||
|
||||
return __bio_split_to_limits(bio, lim, &nr_segs);
|
||||
return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_split_to_limits);
|
||||
|
||||
@ -797,7 +819,7 @@ static inline void blk_update_mixed_merge(struct request *req,
|
||||
|
||||
static void blk_account_io_merge_request(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req)) {
|
||||
if (req->rq_flags & RQF_IO_STAT) {
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_local_dec(req->part,
|
||||
@ -845,12 +867,13 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (rq_data_dir(req) != rq_data_dir(next))
|
||||
return NULL;
|
||||
|
||||
/* Don't merge requests with different write hints. */
|
||||
if (req->write_hint != next->write_hint)
|
||||
return NULL;
|
||||
|
||||
if (req->ioprio != next->ioprio)
|
||||
return NULL;
|
||||
if (req->bio && next->bio) {
|
||||
/* Don't merge requests with different write hints. */
|
||||
if (req->bio->bi_write_hint != next->bio->bi_write_hint)
|
||||
return NULL;
|
||||
if (req->bio->bi_ioprio != next->bio->bi_ioprio)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!blk_atomic_write_mergeable_rqs(req, next))
|
||||
return NULL;
|
||||
@ -979,12 +1002,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
if (!bio_crypt_rq_ctx_compatible(rq, bio))
|
||||
return false;
|
||||
|
||||
/* Don't merge requests with different write hints. */
|
||||
if (rq->write_hint != bio->bi_write_hint)
|
||||
return false;
|
||||
|
||||
if (rq->ioprio != bio_prio(bio))
|
||||
return false;
|
||||
if (rq->bio) {
|
||||
/* Don't merge requests with different write hints. */
|
||||
if (rq->bio->bi_write_hint != bio->bi_write_hint)
|
||||
return false;
|
||||
if (rq->bio->bi_ioprio != bio->bi_ioprio)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
|
||||
return false;
|
||||
@ -1005,12 +1029,11 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
|
||||
|
||||
static void blk_account_io_merge_bio(struct request *req)
|
||||
{
|
||||
if (!blk_do_io_stat(req))
|
||||
return;
|
||||
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
if (req->rq_flags & RQF_IO_STAT) {
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
@ -1156,7 +1179,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
struct blk_plug *plug = current->plug;
|
||||
struct request *rq;
|
||||
|
||||
if (!plug || rq_list_empty(plug->mq_list))
|
||||
if (!plug || rq_list_empty(&plug->mq_list))
|
||||
return false;
|
||||
|
||||
rq_list_for_each(&plug->mq_list, rq) {
|
||||
|
307
block/blk-mq.c
307
block/blk-mq.c
@ -92,7 +92,7 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv)
|
||||
{
|
||||
struct mq_inflight *mi = priv;
|
||||
|
||||
if (rq->part && blk_do_io_stat(rq) &&
|
||||
if (rq->rq_flags & RQF_IO_STAT &&
|
||||
(!bdev_is_partition(mi->part) || rq->part == mi->part) &&
|
||||
blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
|
||||
mi->inflight[rq_data_dir(rq)]++;
|
||||
@ -120,9 +120,59 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
|
||||
inflight[1] = mi.inflight[1];
|
||||
}
|
||||
|
||||
void blk_freeze_queue_start(struct request_queue *q)
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
static bool blk_freeze_set_owner(struct request_queue *q,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
if (!owner)
|
||||
return false;
|
||||
|
||||
if (!q->mq_freeze_depth) {
|
||||
q->mq_freeze_owner = owner;
|
||||
q->mq_freeze_owner_depth = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (owner == q->mq_freeze_owner)
|
||||
q->mq_freeze_owner_depth += 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* verify the last unfreeze in owner context */
|
||||
static bool blk_unfreeze_check_owner(struct request_queue *q)
|
||||
{
|
||||
if (!q->mq_freeze_owner)
|
||||
return false;
|
||||
if (q->mq_freeze_owner != current)
|
||||
return false;
|
||||
if (--q->mq_freeze_owner_depth == 0) {
|
||||
q->mq_freeze_owner = NULL;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static bool blk_freeze_set_owner(struct request_queue *q,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool blk_unfreeze_check_owner(struct request_queue *q)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool __blk_freeze_queue_start(struct request_queue *q,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
bool freeze;
|
||||
|
||||
mutex_lock(&q->mq_freeze_lock);
|
||||
freeze = blk_freeze_set_owner(q, owner);
|
||||
if (++q->mq_freeze_depth == 1) {
|
||||
percpu_ref_kill(&q->q_usage_counter);
|
||||
mutex_unlock(&q->mq_freeze_lock);
|
||||
@ -131,6 +181,14 @@ void blk_freeze_queue_start(struct request_queue *q)
|
||||
} else {
|
||||
mutex_unlock(&q->mq_freeze_lock);
|
||||
}
|
||||
|
||||
return freeze;
|
||||
}
|
||||
|
||||
void blk_freeze_queue_start(struct request_queue *q)
|
||||
{
|
||||
if (__blk_freeze_queue_start(q, current))
|
||||
blk_freeze_acquire_lock(q, false, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
|
||||
|
||||
@ -149,35 +207,17 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
|
||||
|
||||
/*
|
||||
* Guarantee no request is in use, so we can change any data structure of
|
||||
* the queue afterward.
|
||||
*/
|
||||
void blk_freeze_queue(struct request_queue *q)
|
||||
void blk_mq_freeze_queue(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* In the !blk_mq case we are only calling this to kill the
|
||||
* q_usage_counter, otherwise this increases the freeze depth
|
||||
* and waits for it to return to zero. For this reason there is
|
||||
* no blk_unfreeze_queue(), and blk_freeze_queue() is not
|
||||
* exported to drivers as the only user for unfreeze is blk_mq.
|
||||
*/
|
||||
blk_freeze_queue_start(q);
|
||||
blk_mq_freeze_queue_wait(q);
|
||||
}
|
||||
|
||||
void blk_mq_freeze_queue(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* ...just an alias to keep freeze and unfreeze actions balanced
|
||||
* in the blk_mq_* namespace
|
||||
*/
|
||||
blk_freeze_queue(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
|
||||
|
||||
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
|
||||
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
|
||||
{
|
||||
bool unfreeze;
|
||||
|
||||
mutex_lock(&q->mq_freeze_lock);
|
||||
if (force_atomic)
|
||||
q->q_usage_counter.data->force_atomic = true;
|
||||
@ -187,15 +227,39 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
|
||||
percpu_ref_resurrect(&q->q_usage_counter);
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
}
|
||||
unfreeze = blk_unfreeze_check_owner(q);
|
||||
mutex_unlock(&q->mq_freeze_lock);
|
||||
|
||||
return unfreeze;
|
||||
}
|
||||
|
||||
void blk_mq_unfreeze_queue(struct request_queue *q)
|
||||
{
|
||||
__blk_mq_unfreeze_queue(q, false);
|
||||
if (__blk_mq_unfreeze_queue(q, false))
|
||||
blk_unfreeze_release_lock(q, false, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||
|
||||
/*
|
||||
* non_owner variant of blk_freeze_queue_start
|
||||
*
|
||||
* Unlike blk_freeze_queue_start, the queue doesn't need to be unfrozen
|
||||
* by the same task. This is fragile and should not be used if at all
|
||||
* possible.
|
||||
*/
|
||||
void blk_freeze_queue_start_non_owner(struct request_queue *q)
|
||||
{
|
||||
__blk_freeze_queue_start(q, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_freeze_queue_start_non_owner);
|
||||
|
||||
/* non_owner variant of blk_mq_unfreeze_queue */
|
||||
void blk_mq_unfreeze_queue_non_owner(struct request_queue *q)
|
||||
{
|
||||
__blk_mq_unfreeze_queue(q, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_non_owner);
|
||||
|
||||
/*
|
||||
* FIXME: replace the scsi_internal_device_*block_nowait() calls in the
|
||||
* mpt3sas driver such that this function can be removed.
|
||||
@ -283,8 +347,9 @@ void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
|
||||
if (!blk_queue_skip_tagset_quiesce(q))
|
||||
blk_mq_quiesce_queue_nowait(q);
|
||||
}
|
||||
blk_mq_wait_quiesce_done(set);
|
||||
mutex_unlock(&set->tag_list_lock);
|
||||
|
||||
blk_mq_wait_quiesce_done(set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);
|
||||
|
||||
@ -331,14 +396,9 @@ EXPORT_SYMBOL(blk_rq_init);
|
||||
/* Set start and alloc time when the allocated request is actually used */
|
||||
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
|
||||
{
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
rq->start_time_ns = blk_time_get_ns();
|
||||
else
|
||||
rq->start_time_ns = 0;
|
||||
|
||||
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
|
||||
if (blk_queue_rq_alloc_time(rq->q))
|
||||
rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns;
|
||||
rq->alloc_time_ns = alloc_time_ns;
|
||||
else
|
||||
rq->alloc_time_ns = 0;
|
||||
#endif
|
||||
@ -359,8 +419,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_PM)
|
||||
data->rq_flags |= RQF_PM;
|
||||
if (blk_queue_io_stat(q))
|
||||
data->rq_flags |= RQF_IO_STAT;
|
||||
rq->rq_flags = data->rq_flags;
|
||||
|
||||
if (data->rq_flags & RQF_SCHED_TAGS) {
|
||||
@ -420,7 +478,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
|
||||
prefetch(tags->static_rqs[tag]);
|
||||
tag_mask &= ~(1UL << i);
|
||||
rq = blk_mq_rq_ctx_init(data, tags, tag);
|
||||
rq_list_add(data->cached_rq, rq);
|
||||
rq_list_add_head(data->cached_rqs, rq);
|
||||
nr++;
|
||||
}
|
||||
if (!(data->rq_flags & RQF_SCHED_TAGS))
|
||||
@ -429,7 +487,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
|
||||
percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
|
||||
data->nr_tags -= nr;
|
||||
|
||||
return rq_list_pop(data->cached_rq);
|
||||
return rq_list_pop(data->cached_rqs);
|
||||
}
|
||||
|
||||
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
@ -526,7 +584,7 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
|
||||
.flags = flags,
|
||||
.cmd_flags = opf,
|
||||
.nr_tags = plug->nr_ios,
|
||||
.cached_rq = &plug->cached_rq,
|
||||
.cached_rqs = &plug->cached_rqs,
|
||||
};
|
||||
struct request *rq;
|
||||
|
||||
@ -551,14 +609,14 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
|
||||
if (!plug)
|
||||
return NULL;
|
||||
|
||||
if (rq_list_empty(plug->cached_rq)) {
|
||||
if (rq_list_empty(&plug->cached_rqs)) {
|
||||
if (plug->nr_ios == 1)
|
||||
return NULL;
|
||||
rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
|
||||
if (!rq)
|
||||
return NULL;
|
||||
} else {
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
rq = rq_list_peek(&plug->cached_rqs);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
|
||||
@ -567,8 +625,8 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
|
||||
return NULL;
|
||||
|
||||
plug->cached_rq = rq_list_next(rq);
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
rq_list_pop(&plug->cached_rqs);
|
||||
blk_mq_rq_time_init(rq, blk_time_get_ns());
|
||||
}
|
||||
|
||||
rq->cmd_flags = opf;
|
||||
@ -744,7 +802,7 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
|
||||
while ((rq = rq_list_pop(&plug->cached_rqs)) != NULL)
|
||||
blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
@ -764,7 +822,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
|
||||
|
||||
static void blk_account_io_completion(struct request *req, unsigned int bytes)
|
||||
{
|
||||
if (req->part && blk_do_io_stat(req)) {
|
||||
if (req->rq_flags & RQF_IO_STAT) {
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
part_stat_lock();
|
||||
@ -784,7 +842,7 @@ static void blk_print_req_error(struct request *req, blk_status_t status)
|
||||
blk_op_str(req_op(req)),
|
||||
(__force u32)(req->cmd_flags & ~REQ_OP_MASK),
|
||||
req->nr_phys_segments,
|
||||
IOPRIO_PRIO_CLASS(req->ioprio));
|
||||
IOPRIO_PRIO_CLASS(req_get_ioprio(req)));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -982,8 +1040,7 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
* normal IO on queueing nor completion. Accounting the
|
||||
* containing request is enough.
|
||||
*/
|
||||
if (blk_do_io_stat(req) && req->part &&
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ)) {
|
||||
if ((req->rq_flags & (RQF_IO_STAT|RQF_FLUSH_SEQ)) == RQF_IO_STAT) {
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
part_stat_lock();
|
||||
@ -996,28 +1053,63 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool blk_rq_passthrough_stats(struct request *req)
|
||||
{
|
||||
struct bio *bio = req->bio;
|
||||
|
||||
if (!blk_queue_passthrough_stat(req->q))
|
||||
return false;
|
||||
|
||||
/* Requests without a bio do not transfer data. */
|
||||
if (!bio)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Stats are accumulated in the bdev, so must have one attached to a
|
||||
* bio to track stats. Most drivers do not set the bdev for passthrough
|
||||
* requests, but nvme is one that will set it.
|
||||
*/
|
||||
if (!bio->bi_bdev)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We don't know what a passthrough command does, but we know the
|
||||
* payload size and data direction. Ensuring the size is aligned to the
|
||||
* block size filters out most commands with payloads that don't
|
||||
* represent sector access.
|
||||
*/
|
||||
if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void blk_account_io_start(struct request *req)
|
||||
{
|
||||
trace_block_io_start(req);
|
||||
|
||||
if (blk_do_io_stat(req)) {
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (req->bio)
|
||||
req->part = req->bio->bi_bdev;
|
||||
else
|
||||
req->part = req->q->disk->part0;
|
||||
if (!blk_queue_io_stat(req->q))
|
||||
return;
|
||||
if (blk_rq_is_passthrough(req) && !blk_rq_passthrough_stats(req))
|
||||
return;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_local_inc(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
req->rq_flags |= RQF_IO_STAT;
|
||||
req->start_time_ns = blk_time_get_ns();
|
||||
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (req->bio)
|
||||
req->part = req->bio->bi_bdev;
|
||||
else
|
||||
req->part = req->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_local_inc(req->part, in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
|
||||
@ -1300,8 +1392,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*/
|
||||
if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
|
||||
plug->has_elevator = true;
|
||||
rq->rq_next = NULL;
|
||||
rq_list_add(&plug->mq_list, rq);
|
||||
rq_list_add_tail(&plug->mq_list, rq);
|
||||
plug->rq_count++;
|
||||
}
|
||||
|
||||
@ -1698,7 +1789,6 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
|
||||
sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
|
||||
|
||||
struct dispatch_rq_data {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
@ -2200,6 +2290,24 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
|
||||
|
||||
static inline bool blk_mq_hw_queue_need_run(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
bool need_run;
|
||||
|
||||
/*
|
||||
* When queue is quiesced, we may be switching io scheduler, or
|
||||
* updating nr_hw_queues, or other things, and we can't run queue
|
||||
* any more, even blk_mq_hctx_has_pending() can't be called safely.
|
||||
*
|
||||
* And queue will be rerun in blk_mq_unquiesce_queue() if it is
|
||||
* quiesced.
|
||||
*/
|
||||
__blk_mq_run_dispatch_ops(hctx->queue, false,
|
||||
need_run = !blk_queue_quiesced(hctx->queue) &&
|
||||
blk_mq_hctx_has_pending(hctx));
|
||||
return need_run;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_run_hw_queue - Start to run a hardware queue.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
@ -2220,20 +2328,23 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
|
||||
might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
|
||||
|
||||
/*
|
||||
* When queue is quiesced, we may be switching io scheduler, or
|
||||
* updating nr_hw_queues, or other things, and we can't run queue
|
||||
* any more, even __blk_mq_hctx_has_pending() can't be called safely.
|
||||
*
|
||||
* And queue will be rerun in blk_mq_unquiesce_queue() if it is
|
||||
* quiesced.
|
||||
*/
|
||||
__blk_mq_run_dispatch_ops(hctx->queue, false,
|
||||
need_run = !blk_queue_quiesced(hctx->queue) &&
|
||||
blk_mq_hctx_has_pending(hctx));
|
||||
need_run = blk_mq_hw_queue_need_run(hctx);
|
||||
if (!need_run) {
|
||||
unsigned long flags;
|
||||
|
||||
if (!need_run)
|
||||
return;
|
||||
/*
|
||||
* Synchronize with blk_mq_unquiesce_queue(), because we check
|
||||
* if hw queue is quiesced locklessly above, we need the use
|
||||
* ->queue_lock to make sure we see the up-to-date status to
|
||||
* not miss rerunning the hw queue.
|
||||
*/
|
||||
spin_lock_irqsave(&hctx->queue->queue_lock, flags);
|
||||
need_run = blk_mq_hw_queue_need_run(hctx);
|
||||
spin_unlock_irqrestore(&hctx->queue->queue_lock, flags);
|
||||
|
||||
if (!need_run)
|
||||
return;
|
||||
}
|
||||
|
||||
if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
blk_mq_delay_run_hw_queue(hctx, 0);
|
||||
@ -2390,6 +2501,12 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
return;
|
||||
|
||||
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||
/*
|
||||
* Pairs with the smp_mb() in blk_mq_hctx_stopped() to order the
|
||||
* clearing of BLK_MQ_S_STOPPED above and the checking of dispatch
|
||||
* list in the subsequent routine.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
|
||||
@ -2542,7 +2659,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
|
||||
rq->cmd_flags |= REQ_FAILFAST_MASK;
|
||||
|
||||
rq->__sector = bio->bi_iter.bi_sector;
|
||||
rq->write_hint = bio->bi_write_hint;
|
||||
blk_rq_bio_prep(rq, bio, nr_segs);
|
||||
if (bio_integrity(bio))
|
||||
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
|
||||
@ -2620,6 +2736,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2650,6 +2767,7 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
|
||||
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
@ -2666,7 +2784,7 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
while ((rq = rq_list_pop(&plug->mq_list))) {
|
||||
bool last = rq_list_empty(plug->mq_list);
|
||||
bool last = rq_list_empty(&plug->mq_list);
|
||||
|
||||
if (hctx != rq->mq_hctx) {
|
||||
if (hctx) {
|
||||
@ -2709,8 +2827,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
{
|
||||
struct blk_mq_hw_ctx *this_hctx = NULL;
|
||||
struct blk_mq_ctx *this_ctx = NULL;
|
||||
struct request *requeue_list = NULL;
|
||||
struct request **requeue_lastp = &requeue_list;
|
||||
struct rq_list requeue_list = {};
|
||||
unsigned int depth = 0;
|
||||
bool is_passthrough = false;
|
||||
LIST_HEAD(list);
|
||||
@ -2724,12 +2841,12 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
is_passthrough = blk_rq_is_passthrough(rq);
|
||||
} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||
|
||||
is_passthrough != blk_rq_is_passthrough(rq)) {
|
||||
rq_list_add_tail(&requeue_lastp, rq);
|
||||
rq_list_add_tail(&requeue_list, rq);
|
||||
continue;
|
||||
}
|
||||
list_add(&rq->queuelist, &list);
|
||||
list_add_tail(&rq->queuelist, &list);
|
||||
depth++;
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
} while (!rq_list_empty(&plug->mq_list));
|
||||
|
||||
plug->mq_list = requeue_list;
|
||||
trace_block_unplug(this_hctx->queue, depth, !from_sched);
|
||||
@ -2784,19 +2901,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
if (q->mq_ops->queue_rqs) {
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
__blk_mq_flush_plug_list(q, plug));
|
||||
if (rq_list_empty(plug->mq_list))
|
||||
if (rq_list_empty(&plug->mq_list))
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
blk_mq_plug_issue_direct(plug));
|
||||
if (rq_list_empty(plug->mq_list))
|
||||
if (rq_list_empty(&plug->mq_list))
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
blk_mq_dispatch_plug_list(plug, from_schedule);
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
} while (!rq_list_empty(&plug->mq_list));
|
||||
}
|
||||
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
@ -2861,7 +2978,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
if (plug) {
|
||||
data.nr_tags = plug->nr_ios;
|
||||
plug->nr_ios = 1;
|
||||
data.cached_rq = &plug->cached_rq;
|
||||
data.cached_rqs = &plug->cached_rqs;
|
||||
}
|
||||
|
||||
rq = __blk_mq_alloc_requests(&data);
|
||||
@ -2884,7 +3001,7 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
|
||||
if (!plug)
|
||||
return NULL;
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
rq = rq_list_peek(&plug->cached_rqs);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
if (type != rq->mq_hctx->type &&
|
||||
@ -2898,17 +3015,17 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
struct bio *bio)
|
||||
{
|
||||
WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
|
||||
if (rq_list_pop(&plug->cached_rqs) != rq)
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
/*
|
||||
* If any qos ->throttle() end up blocking, we will have flushed the
|
||||
* plug and hence killed the cached_rq list as well. Pop this entry
|
||||
* before we throttle.
|
||||
*/
|
||||
plug->cached_rq = rq_list_next(rq);
|
||||
rq_qos_throttle(rq->q, bio);
|
||||
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
blk_mq_rq_time_init(rq, blk_time_get_ns());
|
||||
rq->cmd_flags = bio->bi_opf;
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
}
|
||||
@ -3187,8 +3304,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
||||
rq->special_vec = rq_src->special_vec;
|
||||
}
|
||||
rq->nr_phys_segments = rq_src->nr_phys_segments;
|
||||
rq->ioprio = rq_src->ioprio;
|
||||
rq->write_hint = rq_src->write_hint;
|
||||
|
||||
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
|
||||
goto free_and_out;
|
||||
|
@ -155,7 +155,7 @@ struct blk_mq_alloc_data {
|
||||
|
||||
/* allocate multiple requests/tags in one go */
|
||||
unsigned int nr_tags;
|
||||
struct request **cached_rq;
|
||||
struct rq_list *cached_rqs;
|
||||
|
||||
/* input & output parameter */
|
||||
struct blk_mq_ctx *ctx;
|
||||
@ -230,6 +230,19 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
|
||||
|
||||
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
/* Fast path: hardware queue is not stopped most of the time. */
|
||||
if (likely(!test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* This barrier is used to order adding of dispatch list before and
|
||||
* the test of BLK_MQ_S_STOPPED below. Pairs with the memory barrier
|
||||
* in blk_mq_start_stopped_hw_queue() so that dispatch code could
|
||||
* either see BLK_MQ_S_STOPPED is cleared or dispatch list is not
|
||||
* empty to avoid missing dispatching requests.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||
}
|
||||
|
||||
|
@ -218,7 +218,6 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
|
||||
return -1;
|
||||
|
||||
data->got_token = true;
|
||||
smp_wmb();
|
||||
wake_up_process(data->task);
|
||||
list_del_init_careful(&curr->entry);
|
||||
return 1;
|
||||
@ -274,10 +273,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
|
||||
* which means we now have two. Put our local token
|
||||
* and wake anyone else potentially waiting for one.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (data.got_token)
|
||||
cleanup_cb(rqw, private_data);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
io_schedule();
|
||||
has_sleeper = true;
|
||||
|
@ -50,7 +50,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
lim->max_sectors = UINT_MAX;
|
||||
lim->max_dev_sectors = UINT_MAX;
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
lim->max_zone_append_sectors = UINT_MAX;
|
||||
lim->max_hw_zone_append_sectors = UINT_MAX;
|
||||
lim->max_user_discard_sectors = UINT_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
@ -91,17 +91,16 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
if (lim->zone_write_granularity < lim->logical_block_size)
|
||||
lim->zone_write_granularity = lim->logical_block_size;
|
||||
|
||||
if (lim->max_zone_append_sectors) {
|
||||
/*
|
||||
* The Zone Append size is limited by the maximum I/O size
|
||||
* and the zone size given that it can't span zones.
|
||||
*/
|
||||
lim->max_zone_append_sectors =
|
||||
min3(lim->max_hw_sectors,
|
||||
lim->max_zone_append_sectors,
|
||||
lim->chunk_sectors);
|
||||
}
|
||||
|
||||
/*
|
||||
* The Zone Append size is limited by the maximum I/O size and the zone
|
||||
* size given that it can't span zones.
|
||||
*
|
||||
* If no max_hw_zone_append_sectors limit is provided, the block layer
|
||||
* will emulated it, else we're also bound by the hardware limit.
|
||||
*/
|
||||
lim->max_zone_append_sectors =
|
||||
min_not_zero(lim->max_hw_zone_append_sectors,
|
||||
min(lim->chunk_sectors, lim->max_hw_sectors));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -223,7 +222,7 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
|
||||
* Check that the limits in lim are valid, initialize defaults for unset
|
||||
* values, and cap values based on others where needed.
|
||||
*/
|
||||
static int blk_validate_limits(struct queue_limits *lim)
|
||||
int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
unsigned int logical_block_sectors;
|
||||
@ -366,6 +365,7 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
return err;
|
||||
return blk_validate_zoned_limits(lim);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_validate_limits);
|
||||
|
||||
/*
|
||||
* Set the default limits for a newly allocated queue. @lim contains the
|
||||
@ -508,10 +508,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->features |= (b->features & BLK_FEAT_INHERIT_MASK);
|
||||
|
||||
/*
|
||||
* BLK_FEAT_NOWAIT and BLK_FEAT_POLL need to be supported both by the
|
||||
* stacking driver and all underlying devices. The stacking driver sets
|
||||
* the flags before stacking the limits, and this will clear the flags
|
||||
* if any of the underlying devices does not support it.
|
||||
* Some feaures need to be supported both by the stacking driver and all
|
||||
* underlying devices. The stacking driver sets these flags before
|
||||
* stacking the limits, and this will clear the flags if any of the
|
||||
* underlying devices does not support it.
|
||||
*/
|
||||
if (!(b->features & BLK_FEAT_NOWAIT))
|
||||
t->features &= ~BLK_FEAT_NOWAIT;
|
||||
@ -527,8 +527,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
|
||||
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
|
||||
b->max_write_zeroes_sectors);
|
||||
t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t),
|
||||
queue_limits_max_zone_append_sectors(b));
|
||||
t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors,
|
||||
b->max_hw_zone_append_sectors);
|
||||
|
||||
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
|
||||
b->seg_boundary_mask);
|
||||
@ -661,7 +661,7 @@ EXPORT_SYMBOL(blk_stack_limits);
|
||||
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
|
||||
sector_t offset, const char *pfx)
|
||||
{
|
||||
if (blk_stack_limits(t, &bdev_get_queue(bdev)->limits,
|
||||
if (blk_stack_limits(t, bdev_limits(bdev),
|
||||
get_start_sect(bdev) + offset))
|
||||
pr_notice("%s: Warning: Device %pg is misaligned\n",
|
||||
pfx, bdev);
|
||||
|
@ -23,14 +23,14 @@
|
||||
struct queue_sysfs_entry {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct gendisk *disk, char *page);
|
||||
int (*load_module)(struct gendisk *disk, const char *page, size_t count);
|
||||
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
|
||||
void (*load_module)(struct gendisk *disk, const char *page, size_t count);
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
queue_var_show(unsigned long var, char *page)
|
||||
{
|
||||
return sprintf(page, "%lu\n", var);
|
||||
return sysfs_emit(page, "%lu\n", var);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@ -121,7 +121,7 @@ QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
|
||||
#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
|
||||
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%llu\n", \
|
||||
return sysfs_emit(page, "%llu\n", \
|
||||
(unsigned long long)disk->queue->limits._field << \
|
||||
SECTOR_SHIFT); \
|
||||
}
|
||||
@ -131,6 +131,7 @@ QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
|
||||
|
||||
#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
|
||||
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
|
||||
@ -144,7 +145,7 @@ QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
|
||||
#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%d\n", _val); \
|
||||
return sysfs_emit(page, "%d\n", _val); \
|
||||
}
|
||||
|
||||
/* deprecated fields */
|
||||
@ -178,18 +179,6 @@ static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For zone append queue_max_zone_append_sectors does not just return the
|
||||
* underlying queue limits, but actually contains a calculation. Because of
|
||||
* that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here.
|
||||
*/
|
||||
static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n",
|
||||
(u64)queue_max_zone_append_sectors(disk->queue) <<
|
||||
SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
|
||||
{
|
||||
@ -235,7 +224,7 @@ static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
|
||||
#define QUEUE_SYSFS_FEATURE(_name, _feature) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%u\n", \
|
||||
return sysfs_emit(page, "%u\n", \
|
||||
!!(disk->queue->limits.features & _feature)); \
|
||||
} \
|
||||
static ssize_t queue_##_name##_store(struct gendisk *disk, \
|
||||
@ -252,7 +241,7 @@ QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
|
||||
#define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%u\n", \
|
||||
return sysfs_emit(page, "%u\n", \
|
||||
!!(disk->queue->limits.features & _feature)); \
|
||||
}
|
||||
|
||||
@ -263,8 +252,8 @@ QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
|
||||
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (blk_queue_is_zoned(disk->queue))
|
||||
return sprintf(page, "host-managed\n");
|
||||
return sprintf(page, "none\n");
|
||||
return sysfs_emit(page, "host-managed\n");
|
||||
return sysfs_emit(page, "none\n");
|
||||
}
|
||||
|
||||
static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
|
||||
@ -272,6 +261,34 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
|
||||
return queue_var_show(disk_nr_zones(disk), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(blk_queue_passthrough_stat(disk->queue), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
unsigned long ios;
|
||||
ssize_t ret;
|
||||
|
||||
ret = queue_var_store(&ios, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (ios)
|
||||
lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
|
||||
else
|
||||
lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
|
||||
|
||||
ret = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
|
||||
@ -349,7 +366,7 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
|
||||
|
||||
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return sprintf(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
|
||||
return sysfs_emit(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
|
||||
}
|
||||
|
||||
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
|
||||
@ -370,8 +387,8 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
|
||||
static ssize_t queue_wc_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (blk_queue_write_cache(disk->queue))
|
||||
return sprintf(page, "write back\n");
|
||||
return sprintf(page, "write through\n");
|
||||
return sysfs_emit(page, "write back\n");
|
||||
return sysfs_emit(page, "write through\n");
|
||||
}
|
||||
|
||||
static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
|
||||
@ -451,7 +468,7 @@ QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_zoned, "zoned");
|
||||
@ -460,6 +477,7 @@ QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
|
||||
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
|
||||
|
||||
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
|
||||
QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
|
||||
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
|
||||
QUEUE_RW_ENTRY(queue_poll, "io_poll");
|
||||
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
|
||||
@ -501,9 +519,9 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
|
||||
return -EINVAL;
|
||||
|
||||
if (wbt_disabled(disk->queue))
|
||||
return sprintf(page, "0\n");
|
||||
return sysfs_emit(page, "0\n");
|
||||
|
||||
return sprintf(page, "%llu\n",
|
||||
return sysfs_emit(page, "%llu\n",
|
||||
div_u64(wbt_get_min_lat(disk->queue), 1000));
|
||||
}
|
||||
|
||||
@ -578,7 +596,7 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_atomic_write_unit_max_entry.attr,
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_max_write_zeroes_sectors_entry.attr,
|
||||
&queue_zone_append_max_entry.attr,
|
||||
&queue_max_zone_append_sectors_entry.attr,
|
||||
&queue_zone_write_granularity_entry.attr,
|
||||
&queue_rotational_entry.attr,
|
||||
&queue_zoned_entry.attr,
|
||||
@ -586,6 +604,7 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_max_open_zones_entry.attr,
|
||||
&queue_max_active_zones_entry.attr,
|
||||
&queue_nomerges_entry.attr,
|
||||
&queue_iostats_passthrough_entry.attr,
|
||||
&queue_iostats_entry.attr,
|
||||
&queue_stable_writes_entry.attr,
|
||||
&queue_add_random_entry.attr,
|
||||
@ -684,11 +703,8 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
* queue to ensure that the module file can be read when the request
|
||||
* queue is the one for the device storing the module file.
|
||||
*/
|
||||
if (entry->load_module) {
|
||||
res = entry->load_module(disk, page, length);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
if (entry->load_module)
|
||||
entry->load_module(disk, page, length);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
@ -1485,13 +1485,13 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
||||
goto out_finish;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!strcmp(tok, "rbps") && val > 1)
|
||||
if (!strcmp(tok, "rbps"))
|
||||
v[0] = val;
|
||||
else if (!strcmp(tok, "wbps") && val > 1)
|
||||
else if (!strcmp(tok, "wbps"))
|
||||
v[1] = val;
|
||||
else if (!strcmp(tok, "riops") && val > 1)
|
||||
else if (!strcmp(tok, "riops"))
|
||||
v[2] = min_t(u64, val, UINT_MAX);
|
||||
else if (!strcmp(tok, "wiops") && val > 1)
|
||||
else if (!strcmp(tok, "wiops"))
|
||||
v[3] = min_t(u64, val, UINT_MAX);
|
||||
else
|
||||
goto out_finish;
|
||||
@ -1526,6 +1526,42 @@ static void throtl_shutdown_wq(struct request_queue *q)
|
||||
cancel_work_sync(&td->dispatch_work);
|
||||
}
|
||||
|
||||
static void tg_flush_bios(struct throtl_grp *tg)
|
||||
{
|
||||
struct throtl_service_queue *sq = &tg->service_queue;
|
||||
|
||||
if (tg->flags & THROTL_TG_CANCELING)
|
||||
return;
|
||||
/*
|
||||
* Set the flag to make sure throtl_pending_timer_fn() won't
|
||||
* stop until all throttled bios are dispatched.
|
||||
*/
|
||||
tg->flags |= THROTL_TG_CANCELING;
|
||||
|
||||
/*
|
||||
* Do not dispatch cgroup without THROTL_TG_PENDING or cgroup
|
||||
* will be inserted to service queue without THROTL_TG_PENDING
|
||||
* set in tg_update_disptime below. Then IO dispatched from
|
||||
* child in tg_dispatch_one_bio will trigger double insertion
|
||||
* and corrupt the tree.
|
||||
*/
|
||||
if (!(tg->flags & THROTL_TG_PENDING))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Update disptime after setting the above flag to make sure
|
||||
* throtl_select_dispatch() won't exit without dispatching.
|
||||
*/
|
||||
tg_update_disptime(tg);
|
||||
|
||||
throtl_schedule_pending_timer(sq, jiffies + 1);
|
||||
}
|
||||
|
||||
static void throtl_pd_offline(struct blkg_policy_data *pd)
|
||||
{
|
||||
tg_flush_bios(pd_to_tg(pd));
|
||||
}
|
||||
|
||||
struct blkcg_policy blkcg_policy_throtl = {
|
||||
.dfl_cftypes = throtl_files,
|
||||
.legacy_cftypes = throtl_legacy_files,
|
||||
@ -1533,6 +1569,7 @@ struct blkcg_policy blkcg_policy_throtl = {
|
||||
.pd_alloc_fn = throtl_pd_alloc,
|
||||
.pd_init_fn = throtl_pd_init,
|
||||
.pd_online_fn = throtl_pd_online,
|
||||
.pd_offline_fn = throtl_pd_offline,
|
||||
.pd_free_fn = throtl_pd_free,
|
||||
};
|
||||
|
||||
@ -1553,32 +1590,15 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
struct throtl_service_queue *sq = &tg->service_queue;
|
||||
|
||||
/*
|
||||
* Set the flag to make sure throtl_pending_timer_fn() won't
|
||||
* stop until all throttled bios are dispatched.
|
||||
* disk_release will call pd_offline_fn to cancel bios.
|
||||
* However, disk_release can't be called if someone get
|
||||
* the refcount of device and issued bios which are
|
||||
* inflight after del_gendisk.
|
||||
* Cancel bios here to ensure no bios are inflight after
|
||||
* del_gendisk.
|
||||
*/
|
||||
tg->flags |= THROTL_TG_CANCELING;
|
||||
|
||||
/*
|
||||
* Do not dispatch cgroup without THROTL_TG_PENDING or cgroup
|
||||
* will be inserted to service queue without THROTL_TG_PENDING
|
||||
* set in tg_update_disptime below. Then IO dispatched from
|
||||
* child in tg_dispatch_one_bio will trigger double insertion
|
||||
* and corrupt the tree.
|
||||
*/
|
||||
if (!(tg->flags & THROTL_TG_PENDING))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Update disptime after setting the above flag to make sure
|
||||
* throtl_select_dispatch() won't exit without dispatching.
|
||||
*/
|
||||
tg_update_disptime(tg);
|
||||
|
||||
throtl_schedule_pending_timer(sq, jiffies + 1);
|
||||
tg_flush_bios(blkg_to_tg(blkg));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/mempool.h>
|
||||
|
||||
#include "blk.h"
|
||||
@ -64,7 +64,7 @@ static const char *const zone_cond_name[] = {
|
||||
struct blk_zone_wplug {
|
||||
struct hlist_node node;
|
||||
struct list_head link;
|
||||
atomic_t ref;
|
||||
refcount_t ref;
|
||||
spinlock_t lock;
|
||||
unsigned int flags;
|
||||
unsigned int zone_no;
|
||||
@ -348,13 +348,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
if (!disk->conv_zones_bitmap)
|
||||
return false;
|
||||
return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
|
||||
{
|
||||
return zone->start + zone->len >= get_capacity(disk);
|
||||
@ -411,7 +404,7 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
|
||||
|
||||
hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) {
|
||||
if (zwplug->zone_no == zno &&
|
||||
atomic_inc_not_zero(&zwplug->ref)) {
|
||||
refcount_inc_not_zero(&zwplug->ref)) {
|
||||
rcu_read_unlock();
|
||||
return zwplug;
|
||||
}
|
||||
@ -432,7 +425,7 @@ static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
|
||||
|
||||
static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
if (atomic_dec_and_test(&zwplug->ref)) {
|
||||
if (refcount_dec_and_test(&zwplug->ref)) {
|
||||
WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
|
||||
WARN_ON_ONCE(!list_empty(&zwplug->link));
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
|
||||
@ -463,7 +456,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
|
||||
* taken when the plug was allocated and another reference taken by the
|
||||
* caller context).
|
||||
*/
|
||||
if (atomic_read(&zwplug->ref) > 2)
|
||||
if (refcount_read(&zwplug->ref) > 2)
|
||||
return false;
|
||||
|
||||
/* We can remove zone write plugs for zones that are empty or full. */
|
||||
@ -533,7 +526,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
|
||||
|
||||
INIT_HLIST_NODE(&zwplug->node);
|
||||
INIT_LIST_HEAD(&zwplug->link);
|
||||
atomic_set(&zwplug->ref, 2);
|
||||
refcount_set(&zwplug->ref, 2);
|
||||
spin_lock_init(&zwplug->lock);
|
||||
zwplug->flags = 0;
|
||||
zwplug->zone_no = zno;
|
||||
@ -624,7 +617,7 @@ static inline void disk_zone_wplug_set_error(struct gendisk *disk,
|
||||
* finished.
|
||||
*/
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
|
||||
atomic_inc(&zwplug->ref);
|
||||
refcount_inc(&zwplug->ref);
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
|
||||
@ -709,7 +702,7 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
|
||||
struct blk_zone_wplug *zwplug;
|
||||
|
||||
/* Conventional zones cannot be reset nor finished. */
|
||||
if (disk_zone_is_conv(disk, sector)) {
|
||||
if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
@ -963,7 +956,7 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
}
|
||||
|
||||
/* Conventional zones do not need write plugging. */
|
||||
if (disk_zone_is_conv(disk, sector)) {
|
||||
if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
|
||||
/* Zone append to conventional zones is not allowed. */
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
bio_io_error(bio);
|
||||
@ -1099,7 +1092,7 @@ static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
|
||||
* reference we take here.
|
||||
*/
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
|
||||
atomic_inc(&zwplug->ref);
|
||||
refcount_inc(&zwplug->ref);
|
||||
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
|
||||
}
|
||||
|
||||
@ -1444,7 +1437,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||
while (!hlist_empty(&disk->zone_wplugs_hash[i])) {
|
||||
zwplug = hlist_entry(disk->zone_wplugs_hash[i].first,
|
||||
struct blk_zone_wplug, node);
|
||||
atomic_inc(&zwplug->ref);
|
||||
refcount_inc(&zwplug->ref);
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
@ -1455,6 +1448,24 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||
disk->zone_wplugs_hash_bits = 0;
|
||||
}
|
||||
|
||||
static unsigned int disk_set_conv_zones_bitmap(struct gendisk *disk,
|
||||
unsigned long *bitmap)
|
||||
{
|
||||
unsigned int nr_conv_zones = 0;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
if (bitmap)
|
||||
nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones);
|
||||
bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap,
|
||||
lockdep_is_held(&disk->zone_wplugs_lock));
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
|
||||
kfree_rcu_mightsleep(bitmap);
|
||||
|
||||
return nr_conv_zones;
|
||||
}
|
||||
|
||||
void disk_free_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
if (!disk->zone_wplugs_pool)
|
||||
@ -1478,8 +1489,7 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||
mempool_destroy(disk->zone_wplugs_pool);
|
||||
disk->zone_wplugs_pool = NULL;
|
||||
|
||||
bitmap_free(disk->conv_zones_bitmap);
|
||||
disk->conv_zones_bitmap = NULL;
|
||||
disk_set_conv_zones_bitmap(disk, NULL);
|
||||
disk->zone_capacity = 0;
|
||||
disk->last_zone_capacity = 0;
|
||||
disk->nr_zones = 0;
|
||||
@ -1538,17 +1548,15 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
struct blk_revalidate_zone_args *args)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned int nr_seq_zones, nr_conv_zones = 0;
|
||||
unsigned int nr_seq_zones, nr_conv_zones;
|
||||
unsigned int pool_size;
|
||||
struct queue_limits lim;
|
||||
|
||||
disk->nr_zones = args->nr_zones;
|
||||
disk->zone_capacity = args->zone_capacity;
|
||||
disk->last_zone_capacity = args->last_zone_capacity;
|
||||
swap(disk->conv_zones_bitmap, args->conv_zones_bitmap);
|
||||
if (disk->conv_zones_bitmap)
|
||||
nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap,
|
||||
disk->nr_zones);
|
||||
nr_conv_zones =
|
||||
disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap);
|
||||
if (nr_conv_zones >= disk->nr_zones) {
|
||||
pr_warn("%s: Invalid number of conventional zones %u / %u\n",
|
||||
disk->disk_name, nr_conv_zones, disk->nr_zones);
|
||||
@ -1774,12 +1782,6 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!queue_max_zone_append_sectors(q)) {
|
||||
pr_warn("%s: Invalid 0 maximum zone append limit\n",
|
||||
disk->disk_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that all memory allocations in this context are done as if
|
||||
* GFP_NOIO was specified.
|
||||
@ -1823,8 +1825,6 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
disk_free_zone_resources(disk);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
kfree(args.conv_zones_bitmap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
|
||||
@ -1851,7 +1851,7 @@ int queue_zone_wplugs_show(void *data, struct seq_file *m)
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
zwp_zone_no = zwplug->zone_no;
|
||||
zwp_flags = zwplug->flags;
|
||||
zwp_ref = atomic_read(&zwplug->ref);
|
||||
zwp_ref = refcount_read(&zwplug->ref);
|
||||
zwp_wp_offset = zwplug->wp_offset;
|
||||
zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
|
52
block/blk.h
52
block/blk.h
@ -4,6 +4,7 @@
|
||||
|
||||
#include <linux/bio-integrity.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/timekeeping.h>
|
||||
@ -34,9 +35,10 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
|
||||
gfp_t flags);
|
||||
void blk_free_flush_queue(struct blk_flush_queue *q);
|
||||
|
||||
void blk_freeze_queue(struct request_queue *q);
|
||||
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
|
||||
void blk_queue_start_drain(struct request_queue *q);
|
||||
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
|
||||
bool blk_queue_start_drain(struct request_queue *q);
|
||||
bool __blk_freeze_queue_start(struct request_queue *q,
|
||||
struct task_struct *owner);
|
||||
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
|
||||
void submit_bio_noacct_nocheck(struct bio *bio);
|
||||
void bio_await_chain(struct bio *bio);
|
||||
@ -69,8 +71,11 @@ static inline int bio_queue_enter(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
|
||||
if (blk_try_enter_queue(q, false))
|
||||
if (blk_try_enter_queue(q, false)) {
|
||||
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
|
||||
rwsem_release(&q->io_lockdep_map, _RET_IP_);
|
||||
return 0;
|
||||
}
|
||||
return __bio_queue_enter(q, bio);
|
||||
}
|
||||
|
||||
@ -405,17 +410,6 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
struct queue_limits *lim);
|
||||
int blk_dev_init(void);
|
||||
|
||||
/*
|
||||
* Contribute to IO statistics IFF:
|
||||
*
|
||||
* a) it's attached to a gendisk, and
|
||||
* b) the queue had IO stats enabled when this request was started
|
||||
*/
|
||||
static inline bool blk_do_io_stat(struct request *rq)
|
||||
{
|
||||
return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
|
||||
}
|
||||
|
||||
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
|
||||
unsigned int part_in_flight(struct block_device *part);
|
||||
|
||||
@ -463,11 +457,6 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
|
||||
{
|
||||
return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
|
||||
}
|
||||
static inline bool bio_is_zone_append(struct bio *bio)
|
||||
{
|
||||
return bio_op(bio) == REQ_OP_ZONE_APPEND ||
|
||||
bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
|
||||
}
|
||||
void blk_zone_write_plug_bio_merged(struct bio *bio);
|
||||
void blk_zone_write_plug_init_request(struct request *rq);
|
||||
static inline void blk_zone_update_request_bio(struct request *rq,
|
||||
@ -516,10 +505,6 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool bio_is_zone_append(struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void blk_zone_write_plug_bio_merged(struct bio *bio)
|
||||
{
|
||||
}
|
||||
@ -558,6 +543,7 @@ void blk_free_ext_minor(unsigned int minor);
|
||||
#define ADDPART_FLAG_NONE 0
|
||||
#define ADDPART_FLAG_RAID 1
|
||||
#define ADDPART_FLAG_WHOLEDISK 2
|
||||
#define ADDPART_FLAG_READONLY 4
|
||||
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
|
||||
sector_t length);
|
||||
int bdev_del_partition(struct gendisk *disk, int partno);
|
||||
@ -734,4 +720,22 @@ void blk_integrity_verify(struct bio *bio);
|
||||
void blk_integrity_prepare(struct request *rq);
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
|
||||
|
||||
static inline void blk_freeze_acquire_lock(struct request_queue *q, bool
|
||||
disk_dead, bool queue_dying)
|
||||
{
|
||||
if (!disk_dead)
|
||||
rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_);
|
||||
if (!queue_dying)
|
||||
rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_);
|
||||
}
|
||||
|
||||
static inline void blk_unfreeze_release_lock(struct request_queue *q, bool
|
||||
disk_dead, bool queue_dying)
|
||||
{
|
||||
if (!queue_dying)
|
||||
rwsem_release(&q->q_lockdep_map, _RET_IP_);
|
||||
if (!disk_dead)
|
||||
rwsem_release(&q->io_lockdep_map, _RET_IP_);
|
||||
}
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
@ -598,13 +598,19 @@ void elevator_init_mq(struct request_queue *q)
|
||||
* drain any dispatch activities originated from passthrough
|
||||
* requests, then no need to quiesce queue which may add long boot
|
||||
* latency, especially when lots of disks are involved.
|
||||
*
|
||||
* Disk isn't added yet, so verifying queue lock only manually.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_freeze_queue_start_non_owner(q);
|
||||
blk_freeze_acquire_lock(q, true, false);
|
||||
blk_mq_freeze_queue_wait(q);
|
||||
|
||||
blk_mq_cancel_work_sync(q);
|
||||
|
||||
err = blk_mq_init_sched(q, e);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
blk_unfreeze_release_lock(q, true, false);
|
||||
blk_mq_unfreeze_queue_non_owner(q);
|
||||
|
||||
if (err) {
|
||||
pr_warn("\"%s\" elevator initialization failed, "
|
||||
@ -704,15 +710,15 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int elv_iosched_load_module(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
struct elevator_type *found;
|
||||
const char *name;
|
||||
|
||||
if (!elv_support_iosched(disk->queue))
|
||||
return -EOPNOTSUPP;
|
||||
return;
|
||||
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
name = strstrip(elevator_name);
|
||||
@ -723,8 +729,6 @@ int elv_iosched_load_module(struct gendisk *disk, const char *buf,
|
||||
|
||||
if (!found)
|
||||
request_module("%s-iosched", name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
|
||||
|
@ -148,8 +148,8 @@ extern void elv_unregister(struct elevator_type *);
|
||||
* io scheduler sysfs switching
|
||||
*/
|
||||
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
|
||||
int elv_iosched_load_module(struct gendisk *disk, const char *page,
|
||||
size_t count);
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *page,
|
||||
size_t count);
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
|
||||
|
||||
extern bool elv_bio_merge_ok(struct request *, struct bio *);
|
||||
|
136
block/genhd.c
136
block/genhd.c
@ -383,16 +383,18 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
|
||||
}
|
||||
|
||||
/**
|
||||
* device_add_disk - add disk information to kernel list
|
||||
* add_disk_fwnode - add disk information to kernel list with fwnode
|
||||
* @parent: parent device for the disk
|
||||
* @disk: per-device partitioning information
|
||||
* @groups: Additional per-device sysfs groups
|
||||
* @fwnode: attached disk fwnode
|
||||
*
|
||||
* This function registers the partitioning information in @disk
|
||||
* with the kernel.
|
||||
* with the kernel. Also attach a fwnode to the disk device.
|
||||
*/
|
||||
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups)
|
||||
int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups,
|
||||
struct fwnode_handle *fwnode)
|
||||
|
||||
{
|
||||
struct device *ddev = disk_to_dev(disk);
|
||||
@ -452,6 +454,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
ddev->parent = parent;
|
||||
ddev->groups = groups;
|
||||
dev_set_name(ddev, "%s", disk->disk_name);
|
||||
if (fwnode)
|
||||
device_set_node(ddev, fwnode);
|
||||
if (!(disk->flags & GENHD_FL_HIDDEN))
|
||||
ddev->devt = MKDEV(disk->major, disk->first_minor);
|
||||
ret = device_add(ddev);
|
||||
@ -553,6 +557,22 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
elevator_exit(disk->queue);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_disk_fwnode);
|
||||
|
||||
/**
|
||||
* device_add_disk - add disk information to kernel list
|
||||
* @parent: parent device for the disk
|
||||
* @disk: per-device partitioning information
|
||||
* @groups: Additional per-device sysfs groups
|
||||
*
|
||||
* This function registers the partitioning information in @disk
|
||||
* with the kernel.
|
||||
*/
|
||||
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups)
|
||||
{
|
||||
return add_disk_fwnode(parent, disk, groups, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(device_add_disk);
|
||||
|
||||
static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
|
||||
@ -581,13 +601,13 @@ static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void __blk_mark_disk_dead(struct gendisk *disk)
|
||||
static bool __blk_mark_disk_dead(struct gendisk *disk)
|
||||
{
|
||||
/*
|
||||
* Fail any new I/O.
|
||||
*/
|
||||
if (test_and_set_bit(GD_DEAD, &disk->state))
|
||||
return;
|
||||
return false;
|
||||
|
||||
if (test_bit(GD_OWNS_QUEUE, &disk->state))
|
||||
blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
|
||||
@ -600,7 +620,7 @@ static void __blk_mark_disk_dead(struct gendisk *disk)
|
||||
/*
|
||||
* Prevent new I/O from crossing bio_queue_enter().
|
||||
*/
|
||||
blk_queue_start_drain(disk->queue);
|
||||
return blk_queue_start_drain(disk->queue);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -641,6 +661,7 @@ void del_gendisk(struct gendisk *disk)
|
||||
struct request_queue *q = disk->queue;
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
bool start_drain, queue_dying;
|
||||
|
||||
might_sleep();
|
||||
|
||||
@ -668,7 +689,10 @@ void del_gendisk(struct gendisk *disk)
|
||||
* Drop all partitions now that the disk is marked dead.
|
||||
*/
|
||||
mutex_lock(&disk->open_mutex);
|
||||
__blk_mark_disk_dead(disk);
|
||||
start_drain = __blk_mark_disk_dead(disk);
|
||||
queue_dying = blk_queue_dying(q);
|
||||
if (start_drain)
|
||||
blk_freeze_acquire_lock(q, true, queue_dying);
|
||||
xa_for_each_start(&disk->part_tbl, idx, part, 1)
|
||||
drop_partition(part);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
@ -725,6 +749,9 @@ void del_gendisk(struct gendisk *disk)
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_exit_queue(q);
|
||||
}
|
||||
|
||||
if (start_drain)
|
||||
blk_unfreeze_release_lock(q, true, queue_dying);
|
||||
}
|
||||
EXPORT_SYMBOL(del_gendisk);
|
||||
|
||||
@ -756,7 +783,7 @@ static ssize_t disk_badblocks_show(struct device *dev,
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
if (!disk->bb)
|
||||
return sprintf(page, "\n");
|
||||
return sysfs_emit(page, "\n");
|
||||
|
||||
return badblocks_show(disk->bb, page, 0);
|
||||
}
|
||||
@ -904,7 +931,7 @@ static ssize_t disk_range_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", disk->minors);
|
||||
return sysfs_emit(buf, "%d\n", disk->minors);
|
||||
}
|
||||
|
||||
static ssize_t disk_ext_range_show(struct device *dev,
|
||||
@ -912,7 +939,7 @@ static ssize_t disk_ext_range_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n",
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
(disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
|
||||
}
|
||||
|
||||
@ -921,7 +948,7 @@ static ssize_t disk_removable_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n",
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
(disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
|
||||
}
|
||||
|
||||
@ -930,7 +957,7 @@ static ssize_t disk_hidden_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n",
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
(disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
|
||||
}
|
||||
|
||||
@ -939,13 +966,13 @@ static ssize_t disk_ro_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
|
||||
return sysfs_emit(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
|
||||
}
|
||||
|
||||
ssize_t part_size_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
|
||||
return sysfs_emit(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
ssize_t part_stat_show(struct device *dev,
|
||||
@ -962,7 +989,7 @@ ssize_t part_stat_show(struct device *dev,
|
||||
part_stat_unlock();
|
||||
}
|
||||
part_stat_read_all(bdev, &stat);
|
||||
return sprintf(buf,
|
||||
return sysfs_emit(buf,
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8u %8u %8u "
|
||||
@ -1004,14 +1031,14 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
|
||||
else
|
||||
part_in_flight_rw(bdev, inflight);
|
||||
|
||||
return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
|
||||
return sysfs_emit(buf, "%8u %8u\n", inflight[0], inflight[1]);
|
||||
}
|
||||
|
||||
static ssize_t disk_capability_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
dev_warn_once(dev, "the capability attribute has been deprecated.\n");
|
||||
return sprintf(buf, "0\n");
|
||||
return sysfs_emit(buf, "0\n");
|
||||
}
|
||||
|
||||
static ssize_t disk_alignment_offset_show(struct device *dev,
|
||||
@ -1020,7 +1047,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
}
|
||||
|
||||
static ssize_t disk_discard_alignment_show(struct device *dev,
|
||||
@ -1029,7 +1056,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0));
|
||||
}
|
||||
|
||||
static ssize_t diskseq_show(struct device *dev,
|
||||
@ -1037,13 +1064,13 @@ static ssize_t diskseq_show(struct device *dev,
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%llu\n", disk->diskseq);
|
||||
return sysfs_emit(buf, "%llu\n", disk->diskseq);
|
||||
}
|
||||
|
||||
static ssize_t partscan_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
|
||||
return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
|
||||
@ -1065,7 +1092,7 @@ static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
|
||||
ssize_t part_fail_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n",
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL));
|
||||
}
|
||||
|
||||
@ -1264,40 +1291,35 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
part_stat_unlock();
|
||||
}
|
||||
part_stat_read_all(hd, &stat);
|
||||
seq_printf(seqf, "%4d %7d %pg "
|
||||
"%lu %lu %lu %u "
|
||||
"%lu %lu %lu %u "
|
||||
"%u %u %u "
|
||||
"%lu %lu %lu %u "
|
||||
"%lu %u"
|
||||
"\n",
|
||||
MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
|
||||
stat.ios[STAT_READ],
|
||||
stat.merges[STAT_READ],
|
||||
stat.sectors[STAT_READ],
|
||||
(unsigned int)div_u64(stat.nsecs[STAT_READ],
|
||||
NSEC_PER_MSEC),
|
||||
stat.ios[STAT_WRITE],
|
||||
stat.merges[STAT_WRITE],
|
||||
stat.sectors[STAT_WRITE],
|
||||
(unsigned int)div_u64(stat.nsecs[STAT_WRITE],
|
||||
NSEC_PER_MSEC),
|
||||
inflight,
|
||||
jiffies_to_msecs(stat.io_ticks),
|
||||
(unsigned int)div_u64(stat.nsecs[STAT_READ] +
|
||||
stat.nsecs[STAT_WRITE] +
|
||||
stat.nsecs[STAT_DISCARD] +
|
||||
stat.nsecs[STAT_FLUSH],
|
||||
NSEC_PER_MSEC),
|
||||
stat.ios[STAT_DISCARD],
|
||||
stat.merges[STAT_DISCARD],
|
||||
stat.sectors[STAT_DISCARD],
|
||||
(unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
|
||||
NSEC_PER_MSEC),
|
||||
stat.ios[STAT_FLUSH],
|
||||
(unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
|
||||
NSEC_PER_MSEC)
|
||||
);
|
||||
seq_put_decimal_ull_width(seqf, "", MAJOR(hd->bd_dev), 4);
|
||||
seq_put_decimal_ull_width(seqf, " ", MINOR(hd->bd_dev), 7);
|
||||
seq_printf(seqf, " %pg", hd);
|
||||
seq_put_decimal_ull(seqf, " ", stat.ios[STAT_READ]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.merges[STAT_READ]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_READ]);
|
||||
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ],
|
||||
NSEC_PER_MSEC));
|
||||
seq_put_decimal_ull(seqf, " ", stat.ios[STAT_WRITE]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.merges[STAT_WRITE]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_WRITE]);
|
||||
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
|
||||
NSEC_PER_MSEC));
|
||||
seq_put_decimal_ull(seqf, " ", inflight);
|
||||
seq_put_decimal_ull(seqf, " ", jiffies_to_msecs(stat.io_ticks));
|
||||
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ] +
|
||||
stat.nsecs[STAT_WRITE] +
|
||||
stat.nsecs[STAT_DISCARD] +
|
||||
stat.nsecs[STAT_FLUSH],
|
||||
NSEC_PER_MSEC));
|
||||
seq_put_decimal_ull(seqf, " ", stat.ios[STAT_DISCARD]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.merges[STAT_DISCARD]);
|
||||
seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_DISCARD]);
|
||||
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
|
||||
NSEC_PER_MSEC));
|
||||
seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]);
|
||||
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
|
||||
NSEC_PER_MSEC));
|
||||
seq_putc(seqf, '\n');
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
|
@ -270,4 +270,13 @@ config CMDLINE_PARTITION
|
||||
Say Y here if you want to read the partition table from bootargs.
|
||||
The format for the command line is just like mtdparts.
|
||||
|
||||
config OF_PARTITION
|
||||
bool "Device Tree partition support" if PARTITION_ADVANCED
|
||||
depends on OF
|
||||
help
|
||||
Say Y here if you want to enable support for partition table
|
||||
defined in Device Tree. (mainly for eMMC)
|
||||
The format for the device tree node is just like MTD fixed-partition
|
||||
schema.
|
||||
|
||||
endmenu
|
||||
|
@ -12,6 +12,7 @@ obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o
|
||||
obj-$(CONFIG_MAC_PARTITION) += mac.o
|
||||
obj-$(CONFIG_LDM_PARTITION) += ldm.o
|
||||
obj-$(CONFIG_MSDOS_PARTITION) += msdos.o
|
||||
obj-$(CONFIG_OF_PARTITION) += of.o
|
||||
obj-$(CONFIG_OSF_PARTITION) += osf.o
|
||||
obj-$(CONFIG_SGI_PARTITION) += sgi.o
|
||||
obj-$(CONFIG_SUN_PARTITION) += sun.o
|
||||
|
@ -62,6 +62,7 @@ int karma_partition(struct parsed_partitions *state);
|
||||
int ldm_partition(struct parsed_partitions *state);
|
||||
int mac_partition(struct parsed_partitions *state);
|
||||
int msdos_partition(struct parsed_partitions *state);
|
||||
int of_partition(struct parsed_partitions *state);
|
||||
int osf_partition(struct parsed_partitions *state);
|
||||
int sgi_partition(struct parsed_partitions *state);
|
||||
int sun_partition(struct parsed_partitions *state);
|
||||
|
@ -237,6 +237,9 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
|
||||
put_partition(state, slot, subpart->from >> 9,
|
||||
subpart->size >> 9);
|
||||
|
||||
if (subpart->flags & PF_RDONLY)
|
||||
state->parts[slot].flags |= ADDPART_FLAG_READONLY;
|
||||
|
||||
info = &state->parts[slot].info;
|
||||
|
||||
strscpy(info->volname, subpart->name, sizeof(info->volname));
|
||||
|
@ -43,6 +43,9 @@ static int (*const check_part[])(struct parsed_partitions *) = {
|
||||
#ifdef CONFIG_CMDLINE_PARTITION
|
||||
cmdline_partition,
|
||||
#endif
|
||||
#ifdef CONFIG_OF_PARTITION
|
||||
of_partition, /* cmdline have priority to OF */
|
||||
#endif
|
||||
#ifdef CONFIG_EFI_PARTITION
|
||||
efi_partition, /* this must come before msdos */
|
||||
#endif
|
||||
@ -253,6 +256,8 @@ static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
|
||||
add_uevent_var(env, "PARTN=%u", bdev_partno(part));
|
||||
if (part->bd_meta_info && part->bd_meta_info->volname[0])
|
||||
add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname);
|
||||
if (part->bd_meta_info && part->bd_meta_info->uuid[0])
|
||||
add_uevent_var(env, "PARTUUID=%s", part->bd_meta_info->uuid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -373,6 +378,9 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
goto out_del;
|
||||
}
|
||||
|
||||
if (flags & ADDPART_FLAG_READONLY)
|
||||
bdev_set_flag(bdev, BD_READ_ONLY);
|
||||
|
||||
/* everything is up and running, commence */
|
||||
err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
|
||||
if (err)
|
||||
|
110
block/partitions/of.c
Normal file
110
block/partitions/of.c
Normal file
@ -0,0 +1,110 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/string.h>
|
||||
#include "check.h"
|
||||
|
||||
static int validate_of_partition(struct device_node *np, int slot)
|
||||
{
|
||||
u64 offset, size;
|
||||
int len;
|
||||
|
||||
const __be32 *reg = of_get_property(np, "reg", &len);
|
||||
int a_cells = of_n_addr_cells(np);
|
||||
int s_cells = of_n_size_cells(np);
|
||||
|
||||
/* Make sure reg len match the expected addr and size cells */
|
||||
if (len / sizeof(*reg) != a_cells + s_cells)
|
||||
return -EINVAL;
|
||||
|
||||
/* Validate offset conversion from bytes to sectors */
|
||||
offset = of_read_number(reg, a_cells);
|
||||
if (offset % SECTOR_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
/* Validate size conversion from bytes to sectors */
|
||||
size = of_read_number(reg + a_cells, s_cells);
|
||||
if (!size || size % SECTOR_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_of_partition(struct parsed_partitions *state, int slot,
|
||||
struct device_node *np)
|
||||
{
|
||||
struct partition_meta_info *info;
|
||||
char tmp[sizeof(info->volname) + 4];
|
||||
const char *partname;
|
||||
int len;
|
||||
|
||||
const __be32 *reg = of_get_property(np, "reg", &len);
|
||||
int a_cells = of_n_addr_cells(np);
|
||||
int s_cells = of_n_size_cells(np);
|
||||
|
||||
/* Convert bytes to sector size */
|
||||
u64 offset = of_read_number(reg, a_cells) / SECTOR_SIZE;
|
||||
u64 size = of_read_number(reg + a_cells, s_cells) / SECTOR_SIZE;
|
||||
|
||||
put_partition(state, slot, offset, size);
|
||||
|
||||
if (of_property_read_bool(np, "read-only"))
|
||||
state->parts[slot].flags |= ADDPART_FLAG_READONLY;
|
||||
|
||||
/*
|
||||
* Follow MTD label logic, search for label property,
|
||||
* fallback to node name if not found.
|
||||
*/
|
||||
info = &state->parts[slot].info;
|
||||
partname = of_get_property(np, "label", &len);
|
||||
if (!partname)
|
||||
partname = of_get_property(np, "name", &len);
|
||||
strscpy(info->volname, partname, sizeof(info->volname));
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
}
|
||||
|
||||
int of_partition(struct parsed_partitions *state)
|
||||
{
|
||||
struct device *ddev = disk_to_dev(state->disk);
|
||||
struct device_node *np;
|
||||
int slot;
|
||||
|
||||
struct device_node *partitions_np = of_node_get(ddev->of_node);
|
||||
|
||||
if (!partitions_np ||
|
||||
!of_device_is_compatible(partitions_np, "fixed-partitions"))
|
||||
return 0;
|
||||
|
||||
slot = 1;
|
||||
/* Validate parition offset and size */
|
||||
for_each_child_of_node(partitions_np, np) {
|
||||
if (validate_of_partition(np, slot)) {
|
||||
of_node_put(np);
|
||||
of_node_put(partitions_np);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
slot++;
|
||||
}
|
||||
|
||||
slot = 1;
|
||||
for_each_child_of_node(partitions_np, np) {
|
||||
if (slot >= state->limit) {
|
||||
of_node_put(np);
|
||||
break;
|
||||
}
|
||||
|
||||
add_of_partition(state, slot, np);
|
||||
|
||||
slot++;
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
|
||||
return 1;
|
||||
}
|
@ -3037,6 +3037,29 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_set_new_sid_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
|
||||
{
|
||||
int ret;
|
||||
struct opal_key *newkey = &opal_pw->new_user_pw.opal_key;
|
||||
struct opal_key *oldkey = &opal_pw->session.opal_key;
|
||||
|
||||
const struct opal_step pw_steps[] = {
|
||||
{ start_SIDASP_opal_session, oldkey },
|
||||
{ set_sid_cpin_pin, newkey },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_activate_user(struct opal_dev *dev,
|
||||
struct opal_session_info *opal_session)
|
||||
{
|
||||
@ -3286,6 +3309,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
case IOC_OPAL_DISCOVERY:
|
||||
ret = opal_get_discv(dev, p);
|
||||
break;
|
||||
case IOC_OPAL_SET_SID_PW:
|
||||
ret = opal_set_new_sid_pw(dev, p);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
@ -316,8 +316,40 @@ __setup("ramdisk_size=", ramdisk_size);
|
||||
* (should share code eventually).
|
||||
*/
|
||||
static LIST_HEAD(brd_devices);
|
||||
static DEFINE_MUTEX(brd_devices_mutex);
|
||||
static struct dentry *brd_debugfs_dir;
|
||||
|
||||
static struct brd_device *brd_find_or_alloc_device(int i)
|
||||
{
|
||||
struct brd_device *brd;
|
||||
|
||||
mutex_lock(&brd_devices_mutex);
|
||||
list_for_each_entry(brd, &brd_devices, brd_list) {
|
||||
if (brd->brd_number == i) {
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
}
|
||||
|
||||
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
|
||||
if (!brd) {
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
brd->brd_number = i;
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
return brd;
|
||||
}
|
||||
|
||||
static void brd_free_device(struct brd_device *brd)
|
||||
{
|
||||
mutex_lock(&brd_devices_mutex);
|
||||
list_del(&brd->brd_list);
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
kfree(brd);
|
||||
}
|
||||
|
||||
static int brd_alloc(int i)
|
||||
{
|
||||
struct brd_device *brd;
|
||||
@ -340,14 +372,9 @@ static int brd_alloc(int i)
|
||||
BLK_FEAT_NOWAIT,
|
||||
};
|
||||
|
||||
list_for_each_entry(brd, &brd_devices, brd_list)
|
||||
if (brd->brd_number == i)
|
||||
return -EEXIST;
|
||||
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
|
||||
if (!brd)
|
||||
return -ENOMEM;
|
||||
brd->brd_number = i;
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
brd = brd_find_or_alloc_device(i);
|
||||
if (IS_ERR(brd))
|
||||
return PTR_ERR(brd);
|
||||
|
||||
xa_init(&brd->brd_pages);
|
||||
|
||||
@ -378,8 +405,7 @@ static int brd_alloc(int i)
|
||||
out_cleanup_disk:
|
||||
put_disk(disk);
|
||||
out_free_dev:
|
||||
list_del(&brd->brd_list);
|
||||
kfree(brd);
|
||||
brd_free_device(brd);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -398,8 +424,7 @@ static void brd_cleanup(void)
|
||||
del_gendisk(brd->brd_disk);
|
||||
put_disk(brd->brd_disk);
|
||||
brd_free_pages(brd);
|
||||
list_del(&brd->brd_list);
|
||||
kfree(brd);
|
||||
brd_free_device(brd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -426,16 +451,6 @@ static int __init brd_init(void)
|
||||
{
|
||||
int err, i;
|
||||
|
||||
brd_check_and_reset_par();
|
||||
|
||||
brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
|
||||
|
||||
for (i = 0; i < rd_nr; i++) {
|
||||
err = brd_alloc(i);
|
||||
if (err)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/*
|
||||
* brd module now has a feature to instantiate underlying device
|
||||
* structure on-demand, provided that there is an access dev node.
|
||||
@ -451,11 +466,18 @@ static int __init brd_init(void)
|
||||
* dynamically.
|
||||
*/
|
||||
|
||||
brd_check_and_reset_par();
|
||||
|
||||
brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
|
||||
|
||||
if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
|
||||
err = -EIO;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
for (i = 0; i < rd_nr; i++)
|
||||
brd_alloc(i);
|
||||
|
||||
pr_info("brd: module loaded\n");
|
||||
return 0;
|
||||
|
||||
|
@ -173,7 +173,7 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
|
||||
static bool lo_bdev_can_use_dio(struct loop_device *lo,
|
||||
struct block_device *backing_bdev)
|
||||
{
|
||||
unsigned short sb_bsize = bdev_logical_block_size(backing_bdev);
|
||||
unsigned int sb_bsize = bdev_logical_block_size(backing_bdev);
|
||||
|
||||
if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
|
||||
return false;
|
||||
@ -786,11 +786,10 @@ static void loop_config_discard(struct loop_device *lo,
|
||||
* file-backed loop devices: discarded regions read back as zero.
|
||||
*/
|
||||
if (S_ISBLK(inode->i_mode)) {
|
||||
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
|
||||
struct block_device *bdev = I_BDEV(inode);
|
||||
|
||||
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
|
||||
granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
|
||||
queue_physical_block_size(backingq);
|
||||
max_discard_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
granularity = bdev_discard_granularity(bdev);
|
||||
|
||||
/*
|
||||
* We use punch hole to reclaim the free space used by the
|
||||
@ -977,7 +976,7 @@ loop_set_status_from_info(struct loop_device *lo,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned short loop_default_blocksize(struct loop_device *lo,
|
||||
static unsigned int loop_default_blocksize(struct loop_device *lo,
|
||||
struct block_device *backing_bdev)
|
||||
{
|
||||
/* In case of direct I/O, match underlying block size */
|
||||
@ -986,7 +985,7 @@ static unsigned short loop_default_blocksize(struct loop_device *lo,
|
||||
return SECTOR_SIZE;
|
||||
}
|
||||
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize)
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
|
@ -2701,7 +2701,12 @@ static int mtip_hw_init(struct driver_data *dd)
|
||||
int rv;
|
||||
unsigned long timeout, timetaken;
|
||||
|
||||
dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
|
||||
dd->mmio = pcim_iomap_region(dd->pdev, MTIP_ABAR, MTIP_DRV_NAME);
|
||||
if (IS_ERR(dd->mmio)) {
|
||||
dev_err(&dd->pdev->dev, "Unable to request / ioremap PCI region\n");
|
||||
return PTR_ERR(dd->mmio);
|
||||
}
|
||||
|
||||
|
||||
mtip_detect_product(dd);
|
||||
if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
|
||||
@ -3710,13 +3715,6 @@ static int mtip_pci_probe(struct pci_dev *pdev,
|
||||
goto iomap_err;
|
||||
}
|
||||
|
||||
/* Map BAR5 to memory. */
|
||||
rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
|
||||
if (rv < 0) {
|
||||
dev_err(&pdev->dev, "Unable to map regions\n");
|
||||
goto iomap_err;
|
||||
}
|
||||
|
||||
rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (rv) {
|
||||
dev_warn(&pdev->dev, "64-bit DMA enable failed\n");
|
||||
|
@ -1638,10 +1638,9 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void null_queue_rqs(struct request **rqlist)
|
||||
static void null_queue_rqs(struct rq_list *rqlist)
|
||||
{
|
||||
struct request *requeue_list = NULL;
|
||||
struct request **requeue_lastp = &requeue_list;
|
||||
struct rq_list requeue_list = {};
|
||||
struct blk_mq_queue_data bd = { };
|
||||
blk_status_t ret;
|
||||
|
||||
@ -1651,8 +1650,8 @@ static void null_queue_rqs(struct request **rqlist)
|
||||
bd.rq = rq;
|
||||
ret = null_queue_rq(rq->mq_hctx, &bd);
|
||||
if (ret != BLK_STS_OK)
|
||||
rq_list_add_tail(&requeue_lastp, rq);
|
||||
} while (!rq_list_empty(*rqlist));
|
||||
rq_list_add_tail(&requeue_list, rq);
|
||||
} while (!rq_list_empty(rqlist));
|
||||
|
||||
*rqlist = requeue_list;
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
|
||||
lim->features |= BLK_FEAT_ZONED;
|
||||
lim->chunk_sectors = dev->zone_size_sects;
|
||||
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
|
||||
lim->max_hw_zone_append_sectors = dev->zone_append_max_sectors;
|
||||
lim->max_open_zones = dev->zone_max_open;
|
||||
lim->max_active_zones = dev->zone_max_active;
|
||||
return 0;
|
||||
|
@ -7284,6 +7284,7 @@ static ssize_t do_rbd_remove(const char *buf, size_t count)
|
||||
*/
|
||||
blk_mq_freeze_queue(rbd_dev->disk->queue);
|
||||
blk_mark_disk_dead(rbd_dev->disk);
|
||||
blk_mq_unfreeze_queue(rbd_dev->disk->queue);
|
||||
}
|
||||
|
||||
del_gendisk(rbd_dev->disk);
|
||||
|
@ -60,7 +60,12 @@
|
||||
| UBLK_F_UNPRIVILEGED_DEV \
|
||||
| UBLK_F_CMD_IOCTL_ENCODE \
|
||||
| UBLK_F_USER_COPY \
|
||||
| UBLK_F_ZONED)
|
||||
| UBLK_F_ZONED \
|
||||
| UBLK_F_USER_RECOVERY_FAIL_IO)
|
||||
|
||||
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
|
||||
| UBLK_F_USER_RECOVERY_REISSUE \
|
||||
| UBLK_F_USER_RECOVERY_FAIL_IO)
|
||||
|
||||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL \
|
||||
@ -143,6 +148,7 @@ struct ublk_queue {
|
||||
bool force_abort;
|
||||
bool timeout;
|
||||
bool canceling;
|
||||
bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
|
||||
unsigned short nr_io_ready; /* how many ios setup */
|
||||
spinlock_t cancel_lock;
|
||||
struct ublk_device *dev;
|
||||
@ -179,8 +185,7 @@ struct ublk_device {
|
||||
unsigned int nr_queues_ready;
|
||||
unsigned int nr_privileged_daemon;
|
||||
|
||||
struct work_struct quiesce_work;
|
||||
struct work_struct stop_work;
|
||||
struct work_struct nosrv_work;
|
||||
};
|
||||
|
||||
/* header of ublk_params */
|
||||
@ -664,30 +669,69 @@ static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
|
||||
return ublk_get_queue(ub, q_id)->io_cmd_buf;
|
||||
}
|
||||
|
||||
static inline int __ublk_queue_cmd_buf_size(int depth)
|
||||
{
|
||||
return round_up(depth * sizeof(struct ublksrv_io_desc), PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
|
||||
{
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
|
||||
|
||||
return round_up(ubq->q_depth * sizeof(struct ublksrv_io_desc),
|
||||
PAGE_SIZE);
|
||||
return __ublk_queue_cmd_buf_size(ubq->q_depth);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_can_use_recovery_reissue(
|
||||
struct ublk_queue *ubq)
|
||||
static int ublk_max_cmd_buf_size(void)
|
||||
{
|
||||
return __ublk_queue_cmd_buf_size(UBLK_MAX_QUEUE_DEPTH);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should I/O outstanding to the ublk server when it exits be reissued?
|
||||
* If not, outstanding I/O will get errors.
|
||||
*/
|
||||
static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub)
|
||||
{
|
||||
return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) &&
|
||||
(ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should I/O issued while there is no ublk server queue? If not, I/O
|
||||
* issued while there is no ublk server will get errors.
|
||||
*/
|
||||
static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub)
|
||||
{
|
||||
return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) &&
|
||||
!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as ublk_nosrv_dev_should_queue_io, but uses a queue-local copy
|
||||
* of the device flags for smaller cache footprint - better for fast
|
||||
* paths.
|
||||
*/
|
||||
static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq)
|
||||
{
|
||||
return (ubq->flags & UBLK_F_USER_RECOVERY) &&
|
||||
(ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
|
||||
!(ubq->flags & UBLK_F_USER_RECOVERY_FAIL_IO);
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_can_use_recovery(
|
||||
struct ublk_queue *ubq)
|
||||
/*
|
||||
* Should ublk devices be stopped (i.e. no recovery possible) when the
|
||||
* ublk server exits? If not, devices can be used again by a future
|
||||
* incarnation of a ublk server via the start_recovery/end_recovery
|
||||
* commands.
|
||||
*/
|
||||
static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub)
|
||||
{
|
||||
return ubq->flags & UBLK_F_USER_RECOVERY;
|
||||
return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY);
|
||||
}
|
||||
|
||||
static inline bool ublk_can_use_recovery(struct ublk_device *ub)
|
||||
static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
|
||||
return ub->dev_info.state == UBLK_S_DEV_QUIESCED ||
|
||||
ub->dev_info.state == UBLK_S_DEV_FAIL_IO;
|
||||
}
|
||||
|
||||
static void ublk_free_disk(struct gendisk *disk)
|
||||
@ -1063,7 +1107,7 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
|
||||
{
|
||||
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
|
||||
|
||||
if (ublk_queue_can_use_recovery_reissue(ubq))
|
||||
if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
|
||||
blk_mq_requeue_request(req, false);
|
||||
else
|
||||
ublk_put_req_ref(ubq, req);
|
||||
@ -1091,7 +1135,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
|
||||
struct request *rq)
|
||||
{
|
||||
/* We cannot process this rq so just requeue it. */
|
||||
if (ublk_queue_can_use_recovery(ubq))
|
||||
if (ublk_nosrv_dev_should_queue_io(ubq->dev))
|
||||
blk_mq_requeue_request(rq, false);
|
||||
else
|
||||
blk_mq_end_request(rq, BLK_STS_IOERR);
|
||||
@ -1236,10 +1280,7 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
|
||||
struct ublk_device *ub = ubq->dev;
|
||||
|
||||
if (ublk_abort_requests(ub, ubq)) {
|
||||
if (ublk_can_use_recovery(ub))
|
||||
schedule_work(&ub->quiesce_work);
|
||||
else
|
||||
schedule_work(&ub->stop_work);
|
||||
schedule_work(&ub->nosrv_work);
|
||||
}
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
@ -1254,6 +1295,10 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq = bd->rq;
|
||||
blk_status_t res;
|
||||
|
||||
if (unlikely(ubq->fail_io)) {
|
||||
return BLK_STS_TARGET;
|
||||
}
|
||||
|
||||
/* fill iod to slot in io cmd buffer */
|
||||
res = ublk_setup_iod(ubq, rq);
|
||||
if (unlikely(res != BLK_STS_OK))
|
||||
@ -1268,7 +1313,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
* Note: force_abort is guaranteed to be seen because it is set
|
||||
* before request queue is unqiuesced.
|
||||
*/
|
||||
if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
|
||||
if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (unlikely(ubq->canceling)) {
|
||||
@ -1322,7 +1367,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct ublk_device *ub = filp->private_data;
|
||||
size_t sz = vma->vm_end - vma->vm_start;
|
||||
unsigned max_sz = UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc);
|
||||
unsigned max_sz = ublk_max_cmd_buf_size();
|
||||
unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT;
|
||||
int q_id, ret = 0;
|
||||
|
||||
@ -1489,10 +1534,7 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
|
||||
ublk_cancel_cmd(ubq, io, issue_flags);
|
||||
|
||||
if (need_schedule) {
|
||||
if (ublk_can_use_recovery(ub))
|
||||
schedule_work(&ub->quiesce_work);
|
||||
else
|
||||
schedule_work(&ub->stop_work);
|
||||
schedule_work(&ub->nosrv_work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1555,20 +1597,6 @@ static void __ublk_quiesce_dev(struct ublk_device *ub)
|
||||
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
|
||||
}
|
||||
|
||||
static void ublk_quiesce_work_fn(struct work_struct *work)
|
||||
{
|
||||
struct ublk_device *ub =
|
||||
container_of(work, struct ublk_device, quiesce_work);
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
|
||||
goto unlock;
|
||||
__ublk_quiesce_dev(ub);
|
||||
unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
static void ublk_unquiesce_dev(struct ublk_device *ub)
|
||||
{
|
||||
int i;
|
||||
@ -1597,7 +1625,7 @@ static void ublk_stop_dev(struct ublk_device *ub)
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
|
||||
goto unlock;
|
||||
if (ublk_can_use_recovery(ub)) {
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
|
||||
__ublk_quiesce_dev(ub);
|
||||
ublk_unquiesce_dev(ub);
|
||||
@ -1617,6 +1645,37 @@ static void ublk_stop_dev(struct ublk_device *ub)
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
static void ublk_nosrv_work(struct work_struct *work)
|
||||
{
|
||||
struct ublk_device *ub =
|
||||
container_of(work, struct ublk_device, nosrv_work);
|
||||
int i;
|
||||
|
||||
if (ublk_nosrv_should_stop_dev(ub)) {
|
||||
ublk_stop_dev(ub);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
|
||||
goto unlock;
|
||||
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
__ublk_quiesce_dev(ub);
|
||||
} else {
|
||||
blk_mq_quiesce_queue(ub->ub_disk->queue);
|
||||
ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
ublk_get_queue(ub, i)->fail_io = true;
|
||||
}
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
/* device can only be started after all IOs are ready */
|
||||
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
{
|
||||
@ -2130,14 +2189,6 @@ static int ublk_add_chdev(struct ublk_device *ub)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ublk_stop_work_fn(struct work_struct *work)
|
||||
{
|
||||
struct ublk_device *ub =
|
||||
container_of(work, struct ublk_device, stop_work);
|
||||
|
||||
ublk_stop_dev(ub);
|
||||
}
|
||||
|
||||
/* align max io buffer size with PAGE_SIZE */
|
||||
static void ublk_align_max_io_size(struct ublk_device *ub)
|
||||
{
|
||||
@ -2162,8 +2213,7 @@ static int ublk_add_tag_set(struct ublk_device *ub)
|
||||
static void ublk_remove(struct ublk_device *ub)
|
||||
{
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->stop_work);
|
||||
cancel_work_sync(&ub->quiesce_work);
|
||||
cancel_work_sync(&ub->nosrv_work);
|
||||
cdev_device_del(&ub->cdev, &ub->cdev_dev);
|
||||
ublk_put_device(ub);
|
||||
ublks_added--;
|
||||
@ -2229,7 +2279,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
lim.features |= BLK_FEAT_ZONED;
|
||||
lim.max_active_zones = p->max_active_zones;
|
||||
lim.max_open_zones = p->max_open_zones;
|
||||
lim.max_zone_append_sectors = p->max_zone_append_sectors;
|
||||
lim.max_hw_zone_append_sectors = p->max_zone_append_sectors;
|
||||
}
|
||||
|
||||
if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) {
|
||||
@ -2372,6 +2422,19 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
|
||||
return -EPERM;
|
||||
|
||||
/* forbid nonsense combinations of recovery flags */
|
||||
switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) {
|
||||
case 0:
|
||||
case UBLK_F_USER_RECOVERY:
|
||||
case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE):
|
||||
case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO):
|
||||
break;
|
||||
default:
|
||||
pr_warn("%s: invalid recovery flags %llx\n", __func__,
|
||||
info.flags & UBLK_F_ALL_RECOVERY_FLAGS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* unprivileged device can't be trusted, but RECOVERY and
|
||||
* RECOVERY_REISSUE still may hang error handling, so can't
|
||||
@ -2424,8 +2487,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
goto out_unlock;
|
||||
mutex_init(&ub->mutex);
|
||||
spin_lock_init(&ub->lock);
|
||||
INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn);
|
||||
INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
|
||||
INIT_WORK(&ub->nosrv_work, ublk_nosrv_work);
|
||||
|
||||
ret = ublk_alloc_dev_number(ub, header->dev_id);
|
||||
if (ret < 0)
|
||||
@ -2560,9 +2622,7 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_stop_dev(struct ublk_device *ub)
|
||||
{
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->stop_work);
|
||||
cancel_work_sync(&ub->quiesce_work);
|
||||
|
||||
cancel_work_sync(&ub->nosrv_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2699,7 +2759,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
|
||||
int i;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (!ublk_can_use_recovery(ub))
|
||||
if (ublk_nosrv_should_stop_dev(ub))
|
||||
goto out_unlock;
|
||||
if (!ub->nr_queues_ready)
|
||||
goto out_unlock;
|
||||
@ -2710,14 +2770,18 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
|
||||
* and related io_uring ctx is freed so file struct of /dev/ublkcX is
|
||||
* released.
|
||||
*
|
||||
* and one of the following holds
|
||||
*
|
||||
* (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work:
|
||||
* (a)has quiesced request queue
|
||||
* (b)has requeued every inflight rqs whose io_flags is ACTIVE
|
||||
* (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE
|
||||
* (d)has completed/camceled all ioucmds owned by ther dying process
|
||||
*
|
||||
* (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not
|
||||
* quiesced, but all I/O is being immediately errored
|
||||
*/
|
||||
if (test_bit(UB_STATE_OPEN, &ub->state) ||
|
||||
ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
|
||||
if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -2741,6 +2805,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
|
||||
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
int ret = -EINVAL;
|
||||
int i;
|
||||
|
||||
pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
|
||||
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
|
||||
@ -2752,21 +2817,32 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
|
||||
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (!ublk_can_use_recovery(ub))
|
||||
if (ublk_nosrv_should_stop_dev(ub))
|
||||
goto out_unlock;
|
||||
|
||||
if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
|
||||
if (!ublk_dev_in_recoverable_state(ub)) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
||||
pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
|
||||
__func__, ublksrv_pid, header->dev_id);
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
pr_devel("%s: queue unquiesced, dev id %d.\n",
|
||||
__func__, header->dev_id);
|
||||
blk_mq_kick_requeue_list(ub->ub_disk->queue);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
|
||||
if (ublk_nosrv_dev_should_queue_io(ub)) {
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
pr_devel("%s: queue unquiesced, dev id %d.\n",
|
||||
__func__, header->dev_id);
|
||||
blk_mq_kick_requeue_list(ub->ub_disk->queue);
|
||||
} else {
|
||||
blk_mq_quiesce_queue(ub->ub_disk->queue);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
|
||||
ublk_get_queue(ub, i)->fail_io = false;
|
||||
}
|
||||
blk_mq_unquiesce_queue(ub->ub_disk->queue);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
@ -471,18 +471,18 @@ static bool virtblk_prep_rq_batch(struct request *req)
|
||||
return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
|
||||
}
|
||||
|
||||
static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
|
||||
struct request **rqlist)
|
||||
static void virtblk_add_req_batch(struct virtio_blk_vq *vq,
|
||||
struct rq_list *rqlist)
|
||||
{
|
||||
struct request *req;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
bool kick;
|
||||
|
||||
spin_lock_irqsave(&vq->lock, flags);
|
||||
|
||||
while (!rq_list_empty(*rqlist)) {
|
||||
struct request *req = rq_list_pop(rqlist);
|
||||
while ((req = rq_list_pop(rqlist))) {
|
||||
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
||||
int err;
|
||||
|
||||
err = virtblk_add_req(vq->vq, vbr);
|
||||
if (err) {
|
||||
@ -495,37 +495,32 @@ static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
|
||||
kick = virtqueue_kick_prepare(vq->vq);
|
||||
spin_unlock_irqrestore(&vq->lock, flags);
|
||||
|
||||
return kick;
|
||||
if (kick)
|
||||
virtqueue_notify(vq->vq);
|
||||
}
|
||||
|
||||
static void virtio_queue_rqs(struct request **rqlist)
|
||||
static void virtio_queue_rqs(struct rq_list *rqlist)
|
||||
{
|
||||
struct request *req, *next, *prev = NULL;
|
||||
struct request *requeue_list = NULL;
|
||||
struct rq_list submit_list = { };
|
||||
struct rq_list requeue_list = { };
|
||||
struct virtio_blk_vq *vq = NULL;
|
||||
struct request *req;
|
||||
|
||||
rq_list_for_each_safe(rqlist, req, next) {
|
||||
struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
|
||||
bool kick;
|
||||
while ((req = rq_list_pop(rqlist))) {
|
||||
struct virtio_blk_vq *this_vq = get_virtio_blk_vq(req->mq_hctx);
|
||||
|
||||
if (!virtblk_prep_rq_batch(req)) {
|
||||
rq_list_move(rqlist, &requeue_list, req, prev);
|
||||
req = prev;
|
||||
if (!req)
|
||||
continue;
|
||||
}
|
||||
if (vq && vq != this_vq)
|
||||
virtblk_add_req_batch(vq, &submit_list);
|
||||
vq = this_vq;
|
||||
|
||||
if (!next || req->mq_hctx != next->mq_hctx) {
|
||||
req->rq_next = NULL;
|
||||
kick = virtblk_add_req_batch(vq, rqlist);
|
||||
if (kick)
|
||||
virtqueue_notify(vq->vq);
|
||||
|
||||
*rqlist = next;
|
||||
prev = NULL;
|
||||
} else
|
||||
prev = req;
|
||||
if (virtblk_prep_rq_batch(req))
|
||||
rq_list_add_tail(&submit_list, req);
|
||||
else
|
||||
rq_list_add_tail(&requeue_list, req);
|
||||
}
|
||||
|
||||
if (vq)
|
||||
virtblk_add_req_batch(vq, &submit_list);
|
||||
*rqlist = requeue_list;
|
||||
}
|
||||
|
||||
@ -784,7 +779,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
|
||||
wg, v);
|
||||
return -ENODEV;
|
||||
}
|
||||
lim->max_zone_append_sectors = v;
|
||||
lim->max_hw_zone_append_sectors = v;
|
||||
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
|
||||
|
||||
return 0;
|
||||
|
@ -3362,7 +3362,7 @@ static int cache_iterate_devices(struct dm_target *ti,
|
||||
static void disable_passdown_if_not_supported(struct cache *cache)
|
||||
{
|
||||
struct block_device *origin_bdev = cache->origin_dev->bdev;
|
||||
struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
|
||||
struct queue_limits *origin_limits = bdev_limits(origin_bdev);
|
||||
const char *reason = NULL;
|
||||
|
||||
if (!cache->features.discard_passdown)
|
||||
@ -3384,7 +3384,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
|
||||
static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
|
||||
{
|
||||
struct block_device *origin_bdev = cache->origin_dev->bdev;
|
||||
struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
|
||||
struct queue_limits *origin_limits = bdev_limits(origin_bdev);
|
||||
|
||||
if (!cache->features.discard_passdown) {
|
||||
/* No passdown is done so setting own virtual limits */
|
||||
|
@ -2020,7 +2020,7 @@ static void clone_resume(struct dm_target *ti)
|
||||
static void disable_passdown_if_not_supported(struct clone *clone)
|
||||
{
|
||||
struct block_device *dest_dev = clone->dest_dev->bdev;
|
||||
struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
|
||||
struct queue_limits *dest_limits = bdev_limits(dest_dev);
|
||||
const char *reason = NULL;
|
||||
|
||||
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
|
||||
@ -2041,7 +2041,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
|
||||
static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
|
||||
{
|
||||
struct block_device *dest_bdev = clone->dest_dev->bdev;
|
||||
struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits;
|
||||
struct queue_limits *dest_limits = bdev_limits(dest_bdev);
|
||||
|
||||
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
|
||||
/* No passdown is done so we set our own virtual limits */
|
||||
|
@ -2842,7 +2842,7 @@ static void disable_discard_passdown_if_not_supported(struct pool_c *pt)
|
||||
{
|
||||
struct pool *pool = pt->pool;
|
||||
struct block_device *data_bdev = pt->data_dev->bdev;
|
||||
struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
|
||||
struct queue_limits *data_limits = bdev_limits(data_bdev);
|
||||
const char *reason = NULL;
|
||||
|
||||
if (!pt->adjusted_pf.discard_passdown)
|
||||
|
@ -344,7 +344,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
} else {
|
||||
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_hw_zone_append_sectors = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -379,7 +379,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
if (!zlim.mapped_nr_seq_zones) {
|
||||
lim->max_open_zones = 0;
|
||||
lim->max_active_zones = 0;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_hw_zone_append_sectors = 0;
|
||||
lim->zone_write_granularity = 0;
|
||||
lim->chunk_sectors = 0;
|
||||
lim->features &= ~BLK_FEAT_ZONED;
|
||||
|
@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap)
|
||||
|
||||
queue_work(md_bitmap_wq, &unplug_work.work);
|
||||
wait_for_completion(&done);
|
||||
destroy_work_on_stack(&unplug_work.work);
|
||||
}
|
||||
|
||||
static void bitmap_unplug(struct mddev *mddev, bool sync)
|
||||
|
@ -9784,9 +9784,7 @@ EXPORT_SYMBOL(md_reap_sync_thread);
|
||||
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
wait_event_timeout(rdev->blocked_wait,
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
!test_bit(BlockedBadBlocks, &rdev->flags),
|
||||
wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev),
|
||||
msecs_to_jiffies(5000));
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
}
|
||||
@ -9815,6 +9813,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
{
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* Recording new badblocks for faulty rdev will force unnecessary
|
||||
* super block updating. This is fragile for external management because
|
||||
* userspace daemon may trying to remove this device and deadlock may
|
||||
* occur. This will be probably solved in the mdadm, but it is safer to
|
||||
* avoid it.
|
||||
*/
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return 1;
|
||||
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
else
|
||||
|
@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
|
||||
trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
|
||||
}
|
||||
|
||||
static inline bool rdev_blocked(struct md_rdev *rdev)
|
||||
{
|
||||
/*
|
||||
* Blocked will be set by error handler and cleared by daemon after
|
||||
* updating superblock, meanwhile write IO should be blocked to prevent
|
||||
* reading old data after power failure.
|
||||
*/
|
||||
if (test_bit(Blocked, &rdev->flags))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Faulty device should not be accessed anymore, there is no need to
|
||||
* wait for bad block to be acknowledged.
|
||||
*/
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/* rdev is blocked by badblocks. */
|
||||
if (test_bit(BlockedBadBlocks, &rdev->flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define mddev_add_trace_msg(mddev, fmt, args...) \
|
||||
do { \
|
||||
if (!mddev_is_dm(mddev)) \
|
||||
|
@ -466,6 +466,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
struct bio *split = bio_split(bio,
|
||||
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
|
||||
if (IS_ERR(split)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
@ -608,6 +614,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
if (sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, sectors, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
|
||||
if (IS_ERR(split)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
||||
bio_endio(bio);
|
||||
return true;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
raid0_map_submit_bio(mddev, bio);
|
||||
bio = split;
|
||||
|
@ -1322,7 +1322,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
const enum req_op op = bio_op(bio);
|
||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||
int max_sectors;
|
||||
int rdisk;
|
||||
int rdisk, error;
|
||||
bool r1bio_existed = !!r1_bio;
|
||||
|
||||
/*
|
||||
@ -1383,6 +1383,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
if (max_sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
gfp, &conf->bio_split);
|
||||
|
||||
if (IS_ERR(split)) {
|
||||
error = PTR_ERR(split);
|
||||
goto err_handle;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
@ -1410,6 +1415,47 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
read_bio->bi_private = r1_bio;
|
||||
mddev_trace_remap(mddev, read_bio, r1_bio->sector);
|
||||
submit_bio_noacct(read_bio);
|
||||
return;
|
||||
|
||||
err_handle:
|
||||
atomic_dec(&mirror->rdev->nr_pending);
|
||||
bio->bi_status = errno_to_blk_status(error);
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
raid_end_bio_io(r1_bio);
|
||||
}
|
||||
|
||||
static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
int disks = conf->raid_disks * 2;
|
||||
int i;
|
||||
|
||||
retry:
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
|
||||
if (!rdev)
|
||||
continue;
|
||||
|
||||
/* don't write here until the bad block is acknowledged */
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags) &&
|
||||
rdev_has_badblock(rdev, bio->bi_iter.bi_sector,
|
||||
bio_sectors(bio)) < 0)
|
||||
set_bit(BlockedBadBlocks, &rdev->flags);
|
||||
|
||||
if (rdev_blocked(rdev)) {
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
return false;
|
||||
|
||||
mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
|
||||
rdev->raid_disk);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
md_wait_for_blocked_rdev(rdev, rdev->mddev);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
@ -1417,9 +1463,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
struct r1bio *r1_bio;
|
||||
int i, disks;
|
||||
int i, disks, k, error;
|
||||
unsigned long flags;
|
||||
struct md_rdev *blocked_rdev;
|
||||
int first_clone;
|
||||
int max_sectors;
|
||||
bool write_behind = false;
|
||||
@ -1457,7 +1502,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
return;
|
||||
}
|
||||
|
||||
retry_write:
|
||||
if (!wait_blocked_rdev(mddev, bio)) {
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
r1_bio = alloc_r1bio(mddev, bio);
|
||||
r1_bio->sectors = max_write_sectors;
|
||||
|
||||
@ -1473,7 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
*/
|
||||
|
||||
disks = conf->raid_disks * 2;
|
||||
blocked_rdev = NULL;
|
||||
max_sectors = r1_bio->sectors;
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
@ -1486,11 +1534,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
|
||||
write_behind = true;
|
||||
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
break;
|
||||
}
|
||||
r1_bio->bios[i] = NULL;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||
if (i < conf->raid_disks)
|
||||
@ -1506,13 +1549,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
|
||||
&first_bad, &bad_sectors);
|
||||
if (is_bad < 0) {
|
||||
/* mustn't write here until the bad block is
|
||||
* acknowledged*/
|
||||
set_bit(BlockedBadBlocks, &rdev->flags);
|
||||
blocked_rdev = rdev;
|
||||
break;
|
||||
}
|
||||
if (is_bad && first_bad <= r1_bio->sector) {
|
||||
/* Cannot write here at all */
|
||||
bad_sectors -= (r1_bio->sector - first_bad);
|
||||
@ -1543,27 +1579,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio->bios[i] = bio;
|
||||
}
|
||||
|
||||
if (unlikely(blocked_rdev)) {
|
||||
/* Wait for this device to become unblocked */
|
||||
int j;
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
if (r1_bio->bios[j])
|
||||
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
|
||||
mempool_free(r1_bio, &conf->r1bio_pool);
|
||||
allow_barrier(conf, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bio->bi_opf & REQ_NOWAIT) {
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
|
||||
blocked_rdev->raid_disk);
|
||||
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
||||
wait_barrier(conf, bio->bi_iter.bi_sector, false);
|
||||
goto retry_write;
|
||||
}
|
||||
|
||||
/*
|
||||
* When using a bitmap, we may call alloc_behind_master_bio below.
|
||||
* alloc_behind_master_bio allocates a copy of the data payload a page
|
||||
@ -1576,6 +1591,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
if (max_sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
GFP_NOIO, &conf->bio_split);
|
||||
|
||||
if (IS_ERR(split)) {
|
||||
error = PTR_ERR(split);
|
||||
goto err_handle;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
@ -1660,6 +1680,18 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
/* In case raid1d snuck in to freeze_array */
|
||||
wake_up_barrier(conf);
|
||||
return;
|
||||
err_handle:
|
||||
for (k = 0; k < i; k++) {
|
||||
if (r1_bio->bios[k]) {
|
||||
rdev_dec_pending(conf->mirrors[k].rdev, mddev);
|
||||
r1_bio->bios[k] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bio->bi_status = errno_to_blk_status(error);
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
raid_end_bio_io(r1_bio);
|
||||
}
|
||||
|
||||
static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
|
||||
|
@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
int slot = r10_bio->read_slot;
|
||||
struct md_rdev *err_rdev = NULL;
|
||||
gfp_t gfp = GFP_NOIO;
|
||||
int error;
|
||||
|
||||
if (slot >= 0 && r10_bio->devs[slot].rdev) {
|
||||
/*
|
||||
@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
if (max_sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
gfp, &conf->bio_split);
|
||||
if (IS_ERR(split)) {
|
||||
error = PTR_ERR(split);
|
||||
goto err_handle;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
submit_bio_noacct(bio);
|
||||
@ -1236,6 +1241,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
mddev_trace_remap(mddev, read_bio, r10_bio->sector);
|
||||
submit_bio_noacct(read_bio);
|
||||
return;
|
||||
err_handle:
|
||||
atomic_dec(&rdev->nr_pending);
|
||||
bio->bi_status = errno_to_blk_status(error);
|
||||
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
||||
raid_end_bio_io(r10_bio);
|
||||
}
|
||||
|
||||
static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
@ -1285,9 +1295,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
|
||||
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int i;
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct md_rdev *blocked_rdev;
|
||||
int i;
|
||||
|
||||
retry_wait:
|
||||
blocked_rdev = NULL;
|
||||
@ -1295,40 +1305,36 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = conf->mirrors[i].rdev;
|
||||
rrdev = conf->mirrors[i].replacement;
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
break;
|
||||
}
|
||||
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
blocked_rdev = rrdev;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
if (rdev) {
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
|
||||
/*
|
||||
* Discard request doesn't care the write result
|
||||
* so it doesn't need to wait blocked disk here.
|
||||
*/
|
||||
if (!r10_bio->sectors)
|
||||
continue;
|
||||
|
||||
if (rdev_has_badblock(rdev, dev_sector,
|
||||
r10_bio->sectors) < 0) {
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags) &&
|
||||
r10_bio->sectors &&
|
||||
rdev_has_badblock(rdev, dev_sector,
|
||||
r10_bio->sectors) < 0)
|
||||
/*
|
||||
* Mustn't write here until the bad block
|
||||
* is acknowledged
|
||||
* Mustn't write here until the bad
|
||||
* block is acknowledged
|
||||
*/
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
set_bit(BlockedBadBlocks, &rdev->flags);
|
||||
|
||||
if (rdev_blocked(rdev)) {
|
||||
blocked_rdev = rdev;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rrdev = conf->mirrors[i].replacement;
|
||||
if (rrdev && rdev_blocked(rrdev)) {
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
blocked_rdev = rrdev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(blocked_rdev)) {
|
||||
@ -1347,9 +1353,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
struct r10bio *r10_bio)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
int i;
|
||||
int i, k;
|
||||
sector_t sectors;
|
||||
int max_sectors;
|
||||
int error;
|
||||
|
||||
if ((mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
@ -1482,6 +1489,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
if (r10_bio->sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, r10_bio->sectors,
|
||||
GFP_NOIO, &conf->bio_split);
|
||||
if (IS_ERR(split)) {
|
||||
error = PTR_ERR(split);
|
||||
goto err_handle;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
submit_bio_noacct(bio);
|
||||
@ -1503,6 +1514,26 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
raid10_write_one_disk(mddev, r10_bio, bio, true, i);
|
||||
}
|
||||
one_write_done(r10_bio);
|
||||
return;
|
||||
err_handle:
|
||||
for (k = 0; k < i; k++) {
|
||||
int d = r10_bio->devs[k].devnum;
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
struct md_rdev *rrdev = conf->mirrors[d].replacement;
|
||||
|
||||
if (r10_bio->devs[k].bio) {
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
r10_bio->devs[k].bio = NULL;
|
||||
}
|
||||
if (r10_bio->devs[k].repl_bio) {
|
||||
rdev_dec_pending(rrdev, mddev);
|
||||
r10_bio->devs[k].repl_bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bio->bi_status = errno_to_blk_status(error);
|
||||
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
||||
raid_end_bio_io(r10_bio);
|
||||
}
|
||||
|
||||
static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
||||
@ -1644,6 +1675,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
if (remainder) {
|
||||
split_size = stripe_size - remainder;
|
||||
split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
|
||||
if (IS_ERR(split)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
||||
bio_endio(bio);
|
||||
return 0;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
/* Resend the fist split part */
|
||||
@ -1654,6 +1690,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
if (remainder) {
|
||||
split_size = bio_sectors(bio) - remainder;
|
||||
split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
|
||||
if (IS_ERR(split)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
||||
bio_endio(bio);
|
||||
return 0;
|
||||
}
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
/* Resend the second split part */
|
||||
|
@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
|
||||
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
|
||||
pplhdr->signature = cpu_to_le32(ppl_conf->signature);
|
||||
|
||||
io->seq = atomic64_add_return(1, &ppl_conf->seq);
|
||||
io->seq = atomic64_inc_return(&ppl_conf->seq);
|
||||
pplhdr->generation = cpu_to_le64(io->seq);
|
||||
|
||||
return io;
|
||||
|
@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (rdev) {
|
||||
is_bad = rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf));
|
||||
if (s->blocked_rdev == NULL
|
||||
&& (test_bit(Blocked, &rdev->flags)
|
||||
|| is_bad < 0)) {
|
||||
if (s->blocked_rdev == NULL) {
|
||||
if (is_bad < 0)
|
||||
set_bit(BlockedBadBlocks,
|
||||
&rdev->flags);
|
||||
s->blocked_rdev = rdev;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
set_bit(BlockedBadBlocks, &rdev->flags);
|
||||
if (rdev_blocked(rdev)) {
|
||||
s->blocked_rdev = rdev;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
}
|
||||
}
|
||||
}
|
||||
clear_bit(R5_Insync, &dev->flags);
|
||||
@ -7177,6 +7176,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
raid5_quiesce(mddev, true);
|
||||
|
||||
conf = mddev->private;
|
||||
if (!conf)
|
||||
err = -ENODEV;
|
||||
@ -7198,6 +7199,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
kfree(old_groups);
|
||||
}
|
||||
}
|
||||
|
||||
raid5_quiesce(mddev, false);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
|
||||
return err ?: len;
|
||||
|
@ -633,7 +633,7 @@ struct r5conf {
|
||||
* two caches.
|
||||
*/
|
||||
int active_name;
|
||||
char cache_name[2][32];
|
||||
char cache_name[2][48];
|
||||
struct kmem_cache *slab_cache; /* for allocating stripes */
|
||||
struct mutex cache_size_mutex; /* Protect changes to cache size */
|
||||
|
||||
|
@ -2501,6 +2501,56 @@ static inline int mmc_blk_readonly(struct mmc_card *card)
|
||||
!(card->csd.cmdclass & CCC_BLOCK_WRITE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for a declared partitions node for the disk in mmc-card related node.
|
||||
*
|
||||
* This is to permit support for partition table defined in DT in special case
|
||||
* where a partition table is not written in the disk and is expected to be
|
||||
* passed from the running system.
|
||||
*
|
||||
* For the user disk, "partitions" node is searched.
|
||||
* For the special HW disk, "partitions-" node with the appended name is used
|
||||
* following this conversion table (to adhere to JEDEC naming)
|
||||
* - boot0 -> partitions-boot1
|
||||
* - boot1 -> partitions-boot2
|
||||
* - gp0 -> partitions-gp1
|
||||
* - gp1 -> partitions-gp2
|
||||
* - gp2 -> partitions-gp3
|
||||
* - gp3 -> partitions-gp4
|
||||
*/
|
||||
static struct fwnode_handle *mmc_blk_get_partitions_node(struct device *mmc_dev,
|
||||
const char *subname)
|
||||
{
|
||||
const char *node_name = "partitions";
|
||||
|
||||
if (subname) {
|
||||
mmc_dev = mmc_dev->parent;
|
||||
|
||||
/*
|
||||
* Check if we are allocating a BOOT disk boot0/1 disk.
|
||||
* In DT we use the JEDEC naming boot1/2.
|
||||
*/
|
||||
if (!strcmp(subname, "boot0"))
|
||||
node_name = "partitions-boot1";
|
||||
if (!strcmp(subname, "boot1"))
|
||||
node_name = "partitions-boot2";
|
||||
/*
|
||||
* Check if we are allocating a GP disk gp0/1/2/3 disk.
|
||||
* In DT we use the JEDEC naming gp1/2/3/4.
|
||||
*/
|
||||
if (!strcmp(subname, "gp0"))
|
||||
node_name = "partitions-gp1";
|
||||
if (!strcmp(subname, "gp1"))
|
||||
node_name = "partitions-gp2";
|
||||
if (!strcmp(subname, "gp2"))
|
||||
node_name = "partitions-gp3";
|
||||
if (!strcmp(subname, "gp3"))
|
||||
node_name = "partitions-gp4";
|
||||
}
|
||||
|
||||
return device_get_named_child_node(mmc_dev, node_name);
|
||||
}
|
||||
|
||||
static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
struct device *parent,
|
||||
sector_t size,
|
||||
@ -2509,6 +2559,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
int area_type,
|
||||
unsigned int part_type)
|
||||
{
|
||||
struct fwnode_handle *disk_fwnode;
|
||||
struct mmc_blk_data *md;
|
||||
int devidx, ret;
|
||||
char cap_str[10];
|
||||
@ -2610,7 +2661,9 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
/* used in ->open, must be set before add_disk: */
|
||||
if (area_type == MMC_BLK_DATA_AREA_MAIN)
|
||||
dev_set_drvdata(&card->dev, md);
|
||||
ret = device_add_disk(md->parent, md->disk, mmc_disk_attr_groups);
|
||||
disk_fwnode = mmc_blk_get_partitions_node(parent, subname);
|
||||
ret = add_disk_fwnode(md->parent, md->disk, mmc_disk_attr_groups,
|
||||
disk_fwnode);
|
||||
if (ret)
|
||||
goto err_put_disk;
|
||||
return md;
|
||||
|
@ -649,7 +649,7 @@ static bool apple_nvme_handle_cq(struct apple_nvme_queue *q, bool force)
|
||||
|
||||
found = apple_nvme_poll_cq(q, &iob);
|
||||
|
||||
if (!rq_list_empty(iob.req_list))
|
||||
if (!rq_list_empty(&iob.req_list))
|
||||
apple_nvme_complete_batch(&iob);
|
||||
|
||||
return found;
|
||||
|
@ -42,6 +42,8 @@ struct nvme_ns_info {
|
||||
bool is_readonly;
|
||||
bool is_ready;
|
||||
bool is_removed;
|
||||
bool is_rotational;
|
||||
bool no_vwc;
|
||||
};
|
||||
|
||||
unsigned int admin_timeout = 60;
|
||||
@ -1639,6 +1641,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
|
||||
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
|
||||
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
|
||||
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
|
||||
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
|
||||
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
|
||||
}
|
||||
kfree(id);
|
||||
return ret;
|
||||
@ -2185,11 +2189,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
ns->head->ids.csi == NVME_CSI_ZNS)
|
||||
nvme_update_zone_info(ns, &lim, &zi);
|
||||
|
||||
if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
|
||||
if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
|
||||
else
|
||||
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
|
||||
|
||||
if (info->is_rotational)
|
||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||
|
||||
/*
|
||||
* Register a metadata profile for PI, or the plain non-integrity NVMe
|
||||
* metadata masquerading as Type 0 if supported, otherwise reject block
|
||||
@ -3636,6 +3643,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
|
||||
head->ns_id = info->nsid;
|
||||
head->ids = info->ids;
|
||||
head->shared = info->is_shared;
|
||||
head->rotational = info->is_rotational;
|
||||
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
|
||||
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
|
||||
kref_init(&head->ref);
|
||||
@ -4017,7 +4025,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
{
|
||||
struct nvme_ns_info info = { .nsid = nsid };
|
||||
struct nvme_ns *ns;
|
||||
int ret;
|
||||
int ret = 1;
|
||||
|
||||
if (nvme_identify_ns_descs(ctrl, &info))
|
||||
return;
|
||||
@ -4034,9 +4042,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
* set up a namespace. If not fall back to the legacy version.
|
||||
*/
|
||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
|
||||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
|
||||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
|
||||
ctrl->vs >= NVME_VS(2, 0, 0))
|
||||
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
|
||||
else
|
||||
if (ret > 0)
|
||||
ret = nvme_ns_info_from_identify(ctrl, &info);
|
||||
|
||||
if (info.is_removed)
|
||||
@ -4895,7 +4904,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
|
||||
srcu_read_lock_held(&ctrl->srcu))
|
||||
blk_mq_unfreeze_queue(ns->queue);
|
||||
blk_mq_unfreeze_queue_non_owner(ns->queue);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
|
||||
}
|
||||
@ -4940,7 +4949,12 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
|
||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||
list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
|
||||
srcu_read_lock_held(&ctrl->srcu))
|
||||
blk_freeze_queue_start(ns->queue);
|
||||
/*
|
||||
* Typical non_owner use case is from pci driver, in which
|
||||
* start_freeze is called from timeout work function, but
|
||||
* unfreeze is done in reset work context
|
||||
*/
|
||||
blk_freeze_queue_start_non_owner(ns->queue);
|
||||
srcu_read_unlock(&ctrl->srcu, srcu_idx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_start_freeze);
|
||||
@ -5036,6 +5050,8 @@ static inline void _nvme_check_size(void)
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
|
||||
@ -5044,22 +5060,20 @@ static inline void _nvme_check_size(void)
|
||||
|
||||
static int __init nvme_core_init(void)
|
||||
{
|
||||
unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
|
||||
int result = -ENOMEM;
|
||||
|
||||
_nvme_check_size();
|
||||
|
||||
nvme_wq = alloc_workqueue("nvme-wq",
|
||||
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
|
||||
nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
|
||||
if (!nvme_wq)
|
||||
goto out;
|
||||
|
||||
nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
|
||||
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
|
||||
nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
|
||||
if (!nvme_reset_wq)
|
||||
goto destroy_wq;
|
||||
|
||||
nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
|
||||
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
|
||||
nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
|
||||
if (!nvme_delete_wq)
|
||||
goto destroy_reset_wq;
|
||||
|
||||
|
@ -114,7 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
|
||||
|
||||
static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
||||
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
|
||||
u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags)
|
||||
struct io_uring_cmd *ioucmd, unsigned int flags)
|
||||
{
|
||||
struct request_queue *q = req->q;
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
@ -152,8 +152,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
||||
bio_set_dev(bio, bdev);
|
||||
|
||||
if (has_metadata) {
|
||||
ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len,
|
||||
meta_seed);
|
||||
ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len);
|
||||
if (ret)
|
||||
goto out_unmap;
|
||||
}
|
||||
@ -170,7 +169,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
||||
|
||||
static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
struct nvme_command *cmd, u64 ubuffer, unsigned bufflen,
|
||||
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
|
||||
void __user *meta_buffer, unsigned meta_len,
|
||||
u64 *result, unsigned timeout, unsigned int flags)
|
||||
{
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
@ -187,7 +186,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
req->timeout = timeout;
|
||||
if (ubuffer && bufflen) {
|
||||
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
|
||||
meta_len, meta_seed, NULL, flags);
|
||||
meta_len, NULL, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -268,7 +267,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
||||
c.rw.lbatm = cpu_to_le16(io.appmask);
|
||||
|
||||
return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata,
|
||||
meta_len, lower_32_bits(io.slba), NULL, 0, 0);
|
||||
meta_len, NULL, 0, 0);
|
||||
}
|
||||
|
||||
static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
|
||||
@ -323,7 +322,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
|
||||
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
|
||||
cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
|
||||
cmd.metadata_len, 0, &result, timeout, 0);
|
||||
cmd.metadata_len, &result, timeout, 0);
|
||||
|
||||
if (status >= 0) {
|
||||
if (put_user(result, &ucmd->result))
|
||||
@ -370,7 +369,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
|
||||
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
|
||||
cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
|
||||
cmd.metadata_len, 0, &cmd.result, timeout, flags);
|
||||
cmd.metadata_len, &cmd.result, timeout, flags);
|
||||
|
||||
if (status >= 0) {
|
||||
if (put_user(cmd.result, &ucmd->result))
|
||||
@ -402,7 +401,7 @@ struct nvme_uring_cmd_pdu {
|
||||
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
|
||||
struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
|
||||
return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
|
||||
}
|
||||
|
||||
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
|
||||
@ -507,7 +506,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
if (d.addr && d.data_len) {
|
||||
ret = nvme_map_user_request(req, d.addr,
|
||||
d.data_len, nvme_to_user_ptr(d.metadata),
|
||||
d.metadata_len, 0, ioucmd, vec);
|
||||
d.metadata_len, ioucmd, vec);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -635,8 +634,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
|
||||
|
||||
ret = nvme_uring_cmd_checks(issue_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -635,8 +635,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
|
||||
if (head->ids.csi == NVME_CSI_ZNS)
|
||||
lim.features |= BLK_FEAT_ZONED;
|
||||
else
|
||||
lim.max_zone_append_sectors = 0;
|
||||
|
||||
head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
|
||||
if (IS_ERR(head->disk))
|
||||
|
@ -474,6 +474,7 @@ struct nvme_ns_head {
|
||||
struct list_head entry;
|
||||
struct kref ref;
|
||||
bool shared;
|
||||
bool rotational;
|
||||
bool passthru_err_log_enabled;
|
||||
struct nvme_effects_log *effects;
|
||||
u64 nuse;
|
||||
|
@ -141,6 +141,7 @@ struct nvme_dev {
|
||||
struct nvme_ctrl ctrl;
|
||||
u32 last_ps;
|
||||
bool hmb;
|
||||
struct sg_table *hmb_sgt;
|
||||
|
||||
mempool_t *iod_mempool;
|
||||
|
||||
@ -153,6 +154,7 @@ struct nvme_dev {
|
||||
/* host memory buffer support: */
|
||||
u64 host_mem_size;
|
||||
u32 nr_host_mem_descs;
|
||||
u32 host_mem_descs_size;
|
||||
dma_addr_t host_mem_descs_dma;
|
||||
struct nvme_host_mem_buf_desc *host_mem_descs;
|
||||
void **host_mem_desc_bufs;
|
||||
@ -902,11 +904,12 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
|
||||
static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
spin_lock(&nvmeq->sq_lock);
|
||||
while (!rq_list_empty(*rqlist)) {
|
||||
struct request *req = rq_list_pop(rqlist);
|
||||
while ((req = rq_list_pop(rqlist))) {
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
|
||||
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
|
||||
@ -929,33 +932,26 @@ static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
|
||||
return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void nvme_queue_rqs(struct request **rqlist)
|
||||
static void nvme_queue_rqs(struct rq_list *rqlist)
|
||||
{
|
||||
struct request *req, *next, *prev = NULL;
|
||||
struct request *requeue_list = NULL;
|
||||
struct rq_list submit_list = { };
|
||||
struct rq_list requeue_list = { };
|
||||
struct nvme_queue *nvmeq = NULL;
|
||||
struct request *req;
|
||||
|
||||
rq_list_for_each_safe(rqlist, req, next) {
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
while ((req = rq_list_pop(rqlist))) {
|
||||
if (nvmeq && nvmeq != req->mq_hctx->driver_data)
|
||||
nvme_submit_cmds(nvmeq, &submit_list);
|
||||
nvmeq = req->mq_hctx->driver_data;
|
||||
|
||||
if (!nvme_prep_rq_batch(nvmeq, req)) {
|
||||
/* detach 'req' and add to remainder list */
|
||||
rq_list_move(rqlist, &requeue_list, req, prev);
|
||||
|
||||
req = prev;
|
||||
if (!req)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!next || req->mq_hctx != next->mq_hctx) {
|
||||
/* detach rest of list, and submit */
|
||||
req->rq_next = NULL;
|
||||
nvme_submit_cmds(nvmeq, rqlist);
|
||||
*rqlist = next;
|
||||
prev = NULL;
|
||||
} else
|
||||
prev = req;
|
||||
if (nvme_prep_rq_batch(nvmeq, req))
|
||||
rq_list_add_tail(&submit_list, req);
|
||||
else
|
||||
rq_list_add_tail(&requeue_list, req);
|
||||
}
|
||||
|
||||
if (nvmeq)
|
||||
nvme_submit_cmds(nvmeq, &submit_list);
|
||||
*rqlist = requeue_list;
|
||||
}
|
||||
|
||||
@ -1083,7 +1079,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
|
||||
DEFINE_IO_COMP_BATCH(iob);
|
||||
|
||||
if (nvme_poll_cq(nvmeq, &iob)) {
|
||||
if (!rq_list_empty(iob.req_list))
|
||||
if (!rq_list_empty(&iob.req_list))
|
||||
nvme_pci_complete_batch(&iob);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
@ -1951,7 +1947,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_free_host_mem(struct nvme_dev *dev)
|
||||
static void nvme_free_host_mem_multi(struct nvme_dev *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -1966,18 +1962,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
|
||||
|
||||
kfree(dev->host_mem_desc_bufs);
|
||||
dev->host_mem_desc_bufs = NULL;
|
||||
dma_free_coherent(dev->dev,
|
||||
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
|
||||
}
|
||||
|
||||
static void nvme_free_host_mem(struct nvme_dev *dev)
|
||||
{
|
||||
if (dev->hmb_sgt)
|
||||
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
|
||||
dev->hmb_sgt, DMA_BIDIRECTIONAL);
|
||||
else
|
||||
nvme_free_host_mem_multi(dev);
|
||||
|
||||
dma_free_coherent(dev->dev, dev->host_mem_descs_size,
|
||||
dev->host_mem_descs, dev->host_mem_descs_dma);
|
||||
dev->host_mem_descs = NULL;
|
||||
dev->host_mem_descs_size = 0;
|
||||
dev->nr_host_mem_descs = 0;
|
||||
}
|
||||
|
||||
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
|
||||
static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
|
||||
{
|
||||
dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
|
||||
DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
|
||||
if (!dev->hmb_sgt)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->host_mem_descs = dma_alloc_coherent(dev->dev,
|
||||
sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
|
||||
GFP_KERNEL);
|
||||
if (!dev->host_mem_descs) {
|
||||
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
|
||||
dev->hmb_sgt, DMA_BIDIRECTIONAL);
|
||||
dev->hmb_sgt = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
dev->host_mem_size = size;
|
||||
dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
|
||||
dev->nr_host_mem_descs = 1;
|
||||
|
||||
dev->host_mem_descs[0].addr =
|
||||
cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
|
||||
dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
|
||||
u32 chunk_size)
|
||||
{
|
||||
struct nvme_host_mem_buf_desc *descs;
|
||||
u32 max_entries, len;
|
||||
u32 max_entries, len, descs_size;
|
||||
dma_addr_t descs_dma;
|
||||
int i = 0;
|
||||
void **bufs;
|
||||
@ -1990,8 +2022,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
|
||||
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
|
||||
max_entries = dev->ctrl.hmmaxd;
|
||||
|
||||
descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
|
||||
&descs_dma, GFP_KERNEL);
|
||||
descs_size = max_entries * sizeof(*descs);
|
||||
descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
|
||||
GFP_KERNEL);
|
||||
if (!descs)
|
||||
goto out;
|
||||
|
||||
@ -2020,6 +2053,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
|
||||
dev->host_mem_size = size;
|
||||
dev->host_mem_descs = descs;
|
||||
dev->host_mem_descs_dma = descs_dma;
|
||||
dev->host_mem_descs_size = descs_size;
|
||||
dev->host_mem_desc_bufs = bufs;
|
||||
return 0;
|
||||
|
||||
@ -2034,8 +2068,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
|
||||
|
||||
kfree(bufs);
|
||||
out_free_descs:
|
||||
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
|
||||
descs_dma);
|
||||
dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
|
||||
out:
|
||||
dev->host_mem_descs = NULL;
|
||||
return -ENOMEM;
|
||||
@ -2047,9 +2080,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
|
||||
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
|
||||
u64 chunk_size;
|
||||
|
||||
/*
|
||||
* If there is an IOMMU that can merge pages, try a virtually
|
||||
* non-contiguous allocation for a single segment first.
|
||||
*/
|
||||
if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
|
||||
if (!nvme_alloc_host_mem_single(dev, preferred))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* start big and work our way down */
|
||||
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
|
||||
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
|
||||
if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
|
||||
if (!min || dev->host_mem_size >= min)
|
||||
return 0;
|
||||
nvme_free_host_mem(dev);
|
||||
@ -2097,8 +2139,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
|
||||
}
|
||||
|
||||
dev_info(dev->ctrl.device,
|
||||
"allocated %lld MiB host memory buffer.\n",
|
||||
dev->host_mem_size >> ilog2(SZ_1M));
|
||||
"allocated %lld MiB host memory buffer (%u segment%s).\n",
|
||||
dev->host_mem_size >> ilog2(SZ_1M),
|
||||
dev->nr_host_mem_descs,
|
||||
str_plural(dev->nr_host_mem_descs));
|
||||
}
|
||||
|
||||
ret = nvme_set_host_mem(dev, enable_bits);
|
||||
|
@ -228,27 +228,61 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
|
||||
|
||||
static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const rrega_strs[] = {
|
||||
[0x00] = "register",
|
||||
[0x01] = "unregister",
|
||||
[0x02] = "replace",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 rrega = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 ptpl = (cdw10[3] >> 6) & 0x3;
|
||||
const char *rrega_str;
|
||||
|
||||
trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
|
||||
rrega, iekey, ptpl);
|
||||
if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
|
||||
rrega_str = rrega_strs[rrega];
|
||||
else
|
||||
rrega_str = "reserved";
|
||||
|
||||
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
|
||||
rrega, rrega_str, iekey, ptpl);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char * const rtype_strs[] = {
|
||||
[0x00] = "reserved",
|
||||
[0x01] = "write exclusive",
|
||||
[0x02] = "exclusive access",
|
||||
[0x03] = "write exclusive registrants only",
|
||||
[0x04] = "exclusive access registrants only",
|
||||
[0x05] = "write exclusive all registrants",
|
||||
[0x06] = "exclusive access all registrants",
|
||||
};
|
||||
|
||||
static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const racqa_strs[] = {
|
||||
[0x00] = "acquire",
|
||||
[0x01] = "preempt",
|
||||
[0x02] = "preempt and abort",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 racqa = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 rtype = cdw10[1];
|
||||
const char *racqa_str = "reserved";
|
||||
const char *rtype_str = "reserved";
|
||||
|
||||
trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
|
||||
racqa, iekey, rtype);
|
||||
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
|
||||
racqa_str = racqa_strs[racqa];
|
||||
|
||||
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
|
||||
rtype_str = rtype_strs[rtype];
|
||||
|
||||
trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
|
||||
racqa, racqa_str, iekey, rtype, rtype_str);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
@ -256,13 +290,25 @@ static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
|
||||
|
||||
static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const rrela_strs[] = {
|
||||
[0x00] = "release",
|
||||
[0x01] = "clear",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 rrela = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 rtype = cdw10[1];
|
||||
const char *rrela_str = "reserved";
|
||||
const char *rtype_str = "reserved";
|
||||
|
||||
trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
|
||||
rrela, iekey, rtype);
|
||||
if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
|
||||
rrela_str = rrela_strs[rrela];
|
||||
|
||||
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
|
||||
rtype_str = rtype_strs[rtype];
|
||||
|
||||
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
|
||||
rrela, rrela_str, iekey, rtype, rtype_str);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
|
@ -111,7 +111,7 @@ void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
|
||||
lim->features |= BLK_FEAT_ZONED;
|
||||
lim->max_open_zones = zi->max_open_zones;
|
||||
lim->max_active_zones = zi->max_active_zones;
|
||||
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
|
||||
lim->max_hw_zone_append_sectors = ns->ctrl->max_zone_append;
|
||||
lim->chunk_sectors = ns->head->zsze =
|
||||
nvme_lba_to_sect(ns->head, zi->zone_size);
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
|
||||
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
|
||||
|
||||
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
|
||||
discovery.o io-cmd-file.o io-cmd-bdev.o
|
||||
discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
|
||||
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
|
||||
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
|
||||
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||
|
@ -71,6 +71,35 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
|
||||
nvmet_req_complete(req, 0);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_supported_log_pages(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_supported_log *logs;
|
||||
u16 status;
|
||||
|
||||
logs = kzalloc(sizeof(*logs), GFP_KERNEL);
|
||||
if (!logs) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
logs->lids[NVME_LOG_SUPPORTED] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_ERROR] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_SMART] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_FW_SLOT] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_CHANGED_NS] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_CMD_EFFECTS] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_ENDURANCE_GROUP] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_ANA] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_FEATURES] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_RMI] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
logs->lids[NVME_LOG_RESERVATION] = cpu_to_le32(NVME_LIDS_LSUPP);
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, logs, sizeof(*logs));
|
||||
kfree(logs);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
|
||||
struct nvme_smart_log *slog)
|
||||
{
|
||||
@ -130,6 +159,45 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
|
||||
return NVME_SC_SUCCESS;
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page_rmi(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_rotational_media_log *log;
|
||||
struct gendisk *disk;
|
||||
u16 status;
|
||||
|
||||
req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
|
||||
req->cmd->get_log_page.lsi));
|
||||
status = nvmet_req_find_ns(req);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
if (!req->ns->bdev || bdev_nonrot(req->ns->bdev)) {
|
||||
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (req->transfer_len != sizeof(*log)) {
|
||||
status = NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
log = kzalloc(sizeof(*log), GFP_KERNEL);
|
||||
if (!log)
|
||||
goto out;
|
||||
|
||||
log->endgid = req->cmd->get_log_page.lsi;
|
||||
disk = req->ns->bdev->bd_disk;
|
||||
if (disk && disk->ia_ranges)
|
||||
log->numa = cpu_to_le16(disk->ia_ranges->nr_ia_ranges);
|
||||
else
|
||||
log->numa = cpu_to_le16(1);
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
|
||||
kfree(log);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_smart_log *log;
|
||||
@ -176,6 +244,10 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
|
||||
log->iocs[nvme_cmd_read] =
|
||||
log->iocs[nvme_cmd_flush] =
|
||||
log->iocs[nvme_cmd_dsm] =
|
||||
log->iocs[nvme_cmd_resv_acquire] =
|
||||
log->iocs[nvme_cmd_resv_register] =
|
||||
log->iocs[nvme_cmd_resv_release] =
|
||||
log->iocs[nvme_cmd_resv_report] =
|
||||
cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
|
||||
log->iocs[nvme_cmd_write] =
|
||||
log->iocs[nvme_cmd_write_zeroes] =
|
||||
@ -272,6 +344,49 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
|
||||
return struct_size(desc, nsids, count);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page_endgrp(struct nvmet_req *req)
|
||||
{
|
||||
u64 host_reads, host_writes, data_units_read, data_units_written;
|
||||
struct nvme_endurance_group_log *log;
|
||||
u16 status;
|
||||
|
||||
/*
|
||||
* The target driver emulates each endurance group as its own
|
||||
* namespace, reusing the nsid as the endurance group identifier.
|
||||
*/
|
||||
req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
|
||||
req->cmd->get_log_page.lsi));
|
||||
status = nvmet_req_find_ns(req);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
log = kzalloc(sizeof(*log), GFP_KERNEL);
|
||||
if (!log) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!req->ns->bdev)
|
||||
goto copy;
|
||||
|
||||
host_reads = part_stat_read(req->ns->bdev, ios[READ]);
|
||||
data_units_read =
|
||||
DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000);
|
||||
host_writes = part_stat_read(req->ns->bdev, ios[WRITE]);
|
||||
data_units_written =
|
||||
DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000);
|
||||
|
||||
put_unaligned_le64(host_reads, &log->hrc[0]);
|
||||
put_unaligned_le64(data_units_read, &log->dur[0]);
|
||||
put_unaligned_le64(host_writes, &log->hwc[0]);
|
||||
put_unaligned_le64(data_units_written, &log->duw[0]);
|
||||
copy:
|
||||
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
|
||||
kfree(log);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_ana_rsp_hdr hdr = { 0, };
|
||||
@ -317,12 +432,44 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page_features(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_supported_features_log *features;
|
||||
u16 status;
|
||||
|
||||
features = kzalloc(sizeof(*features), GFP_KERNEL);
|
||||
if (!features) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
features->fis[NVME_FEAT_NUM_QUEUES] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
|
||||
features->fis[NVME_FEAT_KATO] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
|
||||
features->fis[NVME_FEAT_ASYNC_EVENT] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
|
||||
features->fis[NVME_FEAT_HOST_ID] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
|
||||
features->fis[NVME_FEAT_WRITE_PROTECT] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
|
||||
features->fis[NVME_FEAT_RESV_MASK] =
|
||||
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, features, sizeof(*features));
|
||||
kfree(features);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_get_log_page(struct nvmet_req *req)
|
||||
{
|
||||
if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
|
||||
return;
|
||||
|
||||
switch (req->cmd->get_log_page.lid) {
|
||||
case NVME_LOG_SUPPORTED:
|
||||
return nvmet_execute_get_supported_log_pages(req);
|
||||
case NVME_LOG_ERROR:
|
||||
return nvmet_execute_get_log_page_error(req);
|
||||
case NVME_LOG_SMART:
|
||||
@ -338,8 +485,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
|
||||
return nvmet_execute_get_log_changed_ns(req);
|
||||
case NVME_LOG_CMD_EFFECTS:
|
||||
return nvmet_execute_get_log_cmd_effects_ns(req);
|
||||
case NVME_LOG_ENDURANCE_GROUP:
|
||||
return nvmet_execute_get_log_page_endgrp(req);
|
||||
case NVME_LOG_ANA:
|
||||
return nvmet_execute_get_log_page_ana(req);
|
||||
case NVME_LOG_FEATURES:
|
||||
return nvmet_execute_get_log_page_features(req);
|
||||
case NVME_LOG_RMI:
|
||||
return nvmet_execute_get_log_page_rmi(req);
|
||||
case NVME_LOG_RESERVATION:
|
||||
return nvmet_execute_get_log_page_resv(req);
|
||||
}
|
||||
pr_debug("unhandled lid %d on qid %d\n",
|
||||
req->cmd->get_log_page.lid, req->sq->qid);
|
||||
@ -433,7 +588,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
|
||||
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
|
||||
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
|
||||
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
|
||||
NVME_CTRL_ONCS_WRITE_ZEROES);
|
||||
NVME_CTRL_ONCS_WRITE_ZEROES |
|
||||
NVME_CTRL_ONCS_RESERVATIONS);
|
||||
|
||||
/* XXX: don't report vwc if the underlying device is write through */
|
||||
id->vwc = NVME_CTRL_VWC_PRESENT;
|
||||
@ -467,6 +623,13 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
|
||||
|
||||
id->msdbd = ctrl->ops->msdbd;
|
||||
|
||||
/*
|
||||
* Endurance group identifier is 16 bits, so we can't let namespaces
|
||||
* overflow that since we reuse the nsid
|
||||
*/
|
||||
BUILD_BUG_ON(NVMET_MAX_NAMESPACES > USHRT_MAX);
|
||||
id->endgidmax = cpu_to_le16(NVMET_MAX_NAMESPACES);
|
||||
|
||||
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
|
||||
id->anatt = 10; /* random value */
|
||||
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
|
||||
@ -551,6 +714,21 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
|
||||
id->nmic = NVME_NS_NMIC_SHARED;
|
||||
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
|
||||
|
||||
if (req->ns->pr.enable)
|
||||
id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
|
||||
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
|
||||
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
|
||||
NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF;
|
||||
|
||||
/*
|
||||
* Since we don't know any better, every namespace is its own endurance
|
||||
* group.
|
||||
*/
|
||||
id->endgid = cpu_to_le16(req->ns->nsid);
|
||||
|
||||
memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
|
||||
|
||||
id->lbaf[0].ds = req->ns->blksize_shift;
|
||||
@ -576,7 +754,40 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
|
||||
static void nvmet_execute_identify_endgrp_list(struct nvmet_req *req)
|
||||
{
|
||||
u16 min_endgid = le16_to_cpu(req->cmd->identify.cnssid);
|
||||
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
struct nvmet_ns *ns;
|
||||
unsigned long idx;
|
||||
__le16 *list;
|
||||
u16 status;
|
||||
int i = 1;
|
||||
|
||||
list = kzalloc(buf_size, GFP_KERNEL);
|
||||
if (!list) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
|
||||
if (ns->nsid <= min_endgid)
|
||||
continue;
|
||||
|
||||
list[i++] = cpu_to_le16(ns->nsid);
|
||||
if (i == buf_size / sizeof(__le16))
|
||||
break;
|
||||
}
|
||||
|
||||
list[0] = cpu_to_le16(i - 1);
|
||||
status = nvmet_copy_to_sgl(req, 0, list, buf_size);
|
||||
kfree(list);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_identify_nslist(struct nvmet_req *req, bool match_css)
|
||||
{
|
||||
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
@ -606,6 +817,8 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
|
||||
xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
|
||||
if (ns->nsid <= min_nsid)
|
||||
continue;
|
||||
if (match_css && req->ns->csi != req->cmd->identify.csi)
|
||||
continue;
|
||||
list[i++] = cpu_to_le32(ns->nsid);
|
||||
if (i == buf_size / sizeof(__le32))
|
||||
break;
|
||||
@ -685,6 +898,56 @@ static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
|
||||
nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
|
||||
}
|
||||
|
||||
static void nvme_execute_identify_ns_nvm(struct nvmet_req *req)
|
||||
{
|
||||
u16 status;
|
||||
|
||||
status = nvmet_req_find_ns(req);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, ZERO_PAGE(0),
|
||||
NVME_IDENTIFY_DATA_SIZE);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_id_cs_indep(struct nvmet_req *req)
|
||||
{
|
||||
struct nvme_id_ns_cs_indep *id;
|
||||
u16 status;
|
||||
|
||||
status = nvmet_req_find_ns(req);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
id = kzalloc(sizeof(*id), GFP_KERNEL);
|
||||
if (!id) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
id->nstat = NVME_NSTAT_NRDY;
|
||||
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
|
||||
id->nmic = NVME_NS_NMIC_SHARED;
|
||||
if (req->ns->readonly)
|
||||
id->nsattr |= NVME_NS_ATTR_RO;
|
||||
if (req->ns->bdev && !bdev_nonrot(req->ns->bdev))
|
||||
id->nsfeat |= NVME_NS_ROTATIONAL;
|
||||
/*
|
||||
* We need flush command to flush the file's metadata,
|
||||
* so report supporting vwc if backend is file, even
|
||||
* though buffered_io is disable.
|
||||
*/
|
||||
if (req->ns->bdev && !bdev_write_cache(req->ns->bdev))
|
||||
id->nsfeat |= NVME_NS_VWC_NOT_PRESENT;
|
||||
|
||||
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
|
||||
kfree(id);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
{
|
||||
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
|
||||
@ -698,7 +961,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
nvmet_execute_identify_ctrl(req);
|
||||
return;
|
||||
case NVME_ID_CNS_NS_ACTIVE_LIST:
|
||||
nvmet_execute_identify_nslist(req);
|
||||
nvmet_execute_identify_nslist(req, false);
|
||||
return;
|
||||
case NVME_ID_CNS_NS_DESC_LIST:
|
||||
nvmet_execute_identify_desclist(req);
|
||||
@ -706,8 +969,8 @@ static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
case NVME_ID_CNS_CS_NS:
|
||||
switch (req->cmd->identify.csi) {
|
||||
case NVME_CSI_NVM:
|
||||
/* Not supported */
|
||||
break;
|
||||
nvme_execute_identify_ns_nvm(req);
|
||||
return;
|
||||
case NVME_CSI_ZNS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
nvmet_execute_identify_ns_zns(req);
|
||||
@ -729,6 +992,15 @@ static void nvmet_execute_identify(struct nvmet_req *req)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NVME_ID_CNS_NS_ACTIVE_LIST_CS:
|
||||
nvmet_execute_identify_nslist(req, true);
|
||||
return;
|
||||
case NVME_ID_CNS_NS_CS_INDEP:
|
||||
nvmet_execute_id_cs_indep(req);
|
||||
return;
|
||||
case NVME_ID_CNS_ENDGRP_LIST:
|
||||
nvmet_execute_identify_endgrp_list(req);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_debug("unhandled identify cns %d on qid %d\n",
|
||||
@ -861,6 +1133,9 @@ void nvmet_execute_set_features(struct nvmet_req *req)
|
||||
case NVME_FEAT_WRITE_PROTECT:
|
||||
status = nvmet_set_feat_write_protect(req);
|
||||
break;
|
||||
case NVME_FEAT_RESV_MASK:
|
||||
status = nvmet_set_feat_resv_notif_mask(req, cdw11);
|
||||
break;
|
||||
default:
|
||||
req->error_loc = offsetof(struct nvme_common_command, cdw10);
|
||||
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
|
||||
@ -959,6 +1234,9 @@ void nvmet_execute_get_features(struct nvmet_req *req)
|
||||
case NVME_FEAT_WRITE_PROTECT:
|
||||
status = nvmet_get_feat_write_protect(req);
|
||||
break;
|
||||
case NVME_FEAT_RESV_MASK:
|
||||
status = nvmet_get_feat_resv_notif_mask(req);
|
||||
break;
|
||||
default:
|
||||
req->error_loc =
|
||||
offsetof(struct nvme_common_command, cdw10);
|
||||
|
@ -769,6 +769,32 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
|
||||
|
||||
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
|
||||
|
||||
static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page)
|
||||
{
|
||||
return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable);
|
||||
}
|
||||
|
||||
static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_ns *ns = to_nvmet_ns(item);
|
||||
bool val;
|
||||
|
||||
if (kstrtobool(page, &val))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ns->subsys->lock);
|
||||
if (ns->enabled) {
|
||||
pr_err("the ns:%d is already enabled.\n", ns->nsid);
|
||||
mutex_unlock(&ns->subsys->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
ns->pr.enable = val;
|
||||
mutex_unlock(&ns->subsys->lock);
|
||||
return count;
|
||||
}
|
||||
CONFIGFS_ATTR(nvmet_ns_, resv_enable);
|
||||
|
||||
static struct configfs_attribute *nvmet_ns_attrs[] = {
|
||||
&nvmet_ns_attr_device_path,
|
||||
&nvmet_ns_attr_device_nguid,
|
||||
@ -777,6 +803,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
|
||||
&nvmet_ns_attr_enable,
|
||||
&nvmet_ns_attr_buffered_io,
|
||||
&nvmet_ns_attr_revalidate_size,
|
||||
&nvmet_ns_attr_resv_enable,
|
||||
#ifdef CONFIG_PCI_P2PDMA
|
||||
&nvmet_ns_attr_p2pmem,
|
||||
#endif
|
||||
|
@ -611,6 +611,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
|
||||
if (ret)
|
||||
goto out_restore_subsys_maxnsid;
|
||||
|
||||
if (ns->pr.enable) {
|
||||
ret = nvmet_pr_init_ns(ns);
|
||||
if (ret)
|
||||
goto out_remove_from_subsys;
|
||||
}
|
||||
|
||||
subsys->nr_namespaces++;
|
||||
|
||||
nvmet_ns_changed(subsys, ns->nsid);
|
||||
@ -620,6 +626,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
|
||||
mutex_unlock(&subsys->lock);
|
||||
return ret;
|
||||
|
||||
out_remove_from_subsys:
|
||||
xa_erase(&subsys->namespaces, ns->nsid);
|
||||
out_restore_subsys_maxnsid:
|
||||
subsys->max_nsid = nvmet_max_nsid(subsys);
|
||||
percpu_ref_exit(&ns->ref);
|
||||
@ -663,6 +671,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
|
||||
wait_for_completion(&ns->disable_done);
|
||||
percpu_ref_exit(&ns->ref);
|
||||
|
||||
if (ns->pr.enable)
|
||||
nvmet_pr_exit_ns(ns);
|
||||
|
||||
mutex_lock(&subsys->lock);
|
||||
|
||||
subsys->nr_namespaces--;
|
||||
@ -754,6 +765,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
|
||||
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
|
||||
{
|
||||
struct nvmet_ns *ns = req->ns;
|
||||
struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;
|
||||
|
||||
if (!req->sq->sqhd_disabled)
|
||||
nvmet_update_sq_head(req);
|
||||
@ -766,6 +778,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
|
||||
trace_nvmet_req_complete(req);
|
||||
|
||||
req->ops->queue_response(req);
|
||||
|
||||
if (pc_ref)
|
||||
nvmet_pr_put_ns_pc_ref(pc_ref);
|
||||
if (ns)
|
||||
nvmet_put_namespace(ns);
|
||||
}
|
||||
@ -929,18 +944,39 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (req->ns->pr.enable) {
|
||||
ret = nvmet_parse_pr_cmd(req);
|
||||
if (!ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (req->ns->csi) {
|
||||
case NVME_CSI_NVM:
|
||||
if (req->ns->file)
|
||||
return nvmet_file_parse_io_cmd(req);
|
||||
return nvmet_bdev_parse_io_cmd(req);
|
||||
ret = nvmet_file_parse_io_cmd(req);
|
||||
else
|
||||
ret = nvmet_bdev_parse_io_cmd(req);
|
||||
break;
|
||||
case NVME_CSI_ZNS:
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
|
||||
return nvmet_bdev_zns_parse_io_cmd(req);
|
||||
return NVME_SC_INVALID_IO_CMD_SET;
|
||||
ret = nvmet_bdev_zns_parse_io_cmd(req);
|
||||
else
|
||||
ret = NVME_SC_INVALID_IO_CMD_SET;
|
||||
break;
|
||||
default:
|
||||
return NVME_SC_INVALID_IO_CMD_SET;
|
||||
ret = NVME_SC_INVALID_IO_CMD_SET;
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (req->ns->pr.enable) {
|
||||
ret = nvmet_pr_check_cmd_access(req);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = nvmet_pr_get_ns_pc_ref(req);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
|
||||
@ -964,6 +1000,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
|
||||
req->ns = NULL;
|
||||
req->error_loc = NVMET_NO_ERROR_LOC;
|
||||
req->error_slba = 0;
|
||||
req->pc_ref = NULL;
|
||||
|
||||
/* no support for fused commands yet */
|
||||
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
|
||||
@ -1015,6 +1052,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
|
||||
void nvmet_req_uninit(struct nvmet_req *req)
|
||||
{
|
||||
percpu_ref_put(&req->sq->ref);
|
||||
if (req->pc_ref)
|
||||
nvmet_pr_put_ns_pc_ref(req->pc_ref);
|
||||
if (req->ns)
|
||||
nvmet_put_namespace(req->ns);
|
||||
}
|
||||
@ -1383,7 +1422,8 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
|
||||
}
|
||||
|
||||
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
|
||||
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
|
||||
uuid_t *hostid)
|
||||
{
|
||||
struct nvmet_subsys *subsys;
|
||||
struct nvmet_ctrl *ctrl;
|
||||
@ -1462,6 +1502,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
}
|
||||
ctrl->cntlid = ret;
|
||||
|
||||
uuid_copy(&ctrl->hostid, hostid);
|
||||
|
||||
/*
|
||||
* Discovery controllers may use some arbitrary high value
|
||||
* in order to cleanup stale discovery sessions
|
||||
@ -1478,6 +1520,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
nvmet_start_keep_alive_timer(ctrl);
|
||||
|
||||
mutex_lock(&subsys->lock);
|
||||
ret = nvmet_ctrl_init_pr(ctrl);
|
||||
if (ret)
|
||||
goto init_pr_fail;
|
||||
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
|
||||
nvmet_setup_p2p_ns_map(ctrl, req);
|
||||
nvmet_debugfs_ctrl_setup(ctrl);
|
||||
@ -1486,6 +1531,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
*ctrlp = ctrl;
|
||||
return 0;
|
||||
|
||||
init_pr_fail:
|
||||
mutex_unlock(&subsys->lock);
|
||||
nvmet_stop_keep_alive_timer(ctrl);
|
||||
ida_free(&cntlid_ida, ctrl->cntlid);
|
||||
out_free_sqs:
|
||||
kfree(ctrl->sqs);
|
||||
out_free_changed_ns_list:
|
||||
@ -1504,6 +1553,7 @@ static void nvmet_ctrl_free(struct kref *ref)
|
||||
struct nvmet_subsys *subsys = ctrl->subsys;
|
||||
|
||||
mutex_lock(&subsys->lock);
|
||||
nvmet_ctrl_destroy_pr(ctrl);
|
||||
nvmet_release_p2p_ns_map(ctrl);
|
||||
list_del(&ctrl->subsys_entry);
|
||||
mutex_unlock(&subsys->lock);
|
||||
@ -1717,7 +1767,7 @@ static int __init nvmet_init(void)
|
||||
goto out_free_zbd_work_queue;
|
||||
|
||||
nvmet_wq = alloc_workqueue("nvmet-wq",
|
||||
WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
|
||||
WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
if (!nvmet_wq)
|
||||
goto out_free_buffered_work_queue;
|
||||
|
||||
|
@ -64,6 +64,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
|
||||
case NVME_REG_CSTS:
|
||||
val = ctrl->csts;
|
||||
break;
|
||||
case NVME_REG_CRTO:
|
||||
val = NVME_CAP_TIMEOUT(ctrl->csts);
|
||||
break;
|
||||
default:
|
||||
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
|
||||
break;
|
||||
@ -245,12 +248,10 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
|
||||
d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
|
||||
d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
|
||||
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
|
||||
le32_to_cpu(c->kato), &ctrl);
|
||||
le32_to_cpu(c->kato), &ctrl, &d->hostid);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
uuid_copy(&ctrl->hostid, &d->hostid);
|
||||
|
||||
dhchap_status = nvmet_setup_auth(ctrl);
|
||||
if (dhchap_status) {
|
||||
pr_err("Failed to setup authentication, dhchap status %u\n",
|
||||
|
@ -20,8 +20,9 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/t10-pi.h>
|
||||
#include <linux/kfifo.h>
|
||||
|
||||
#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
|
||||
#define NVMET_DEFAULT_VS NVME_VS(2, 1, 0)
|
||||
|
||||
#define NVMET_ASYNC_EVENTS 4
|
||||
#define NVMET_ERROR_LOG_SLOTS 128
|
||||
@ -30,6 +31,7 @@
|
||||
#define NVMET_MN_MAX_SIZE 40
|
||||
#define NVMET_SN_MAX_SIZE 20
|
||||
#define NVMET_FR_MAX_SIZE 8
|
||||
#define NVMET_PR_LOG_QUEUE_SIZE 64
|
||||
|
||||
/*
|
||||
* Supported optional AENs:
|
||||
@ -56,6 +58,38 @@
|
||||
#define IPO_IATTR_CONNECT_SQE(x) \
|
||||
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
|
||||
|
||||
struct nvmet_pr_registrant {
|
||||
u64 rkey;
|
||||
uuid_t hostid;
|
||||
enum nvme_pr_type rtype;
|
||||
struct list_head entry;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct nvmet_pr {
|
||||
bool enable;
|
||||
unsigned long notify_mask;
|
||||
atomic_t generation;
|
||||
struct nvmet_pr_registrant __rcu *holder;
|
||||
/*
|
||||
* During the execution of the reservation command, mutual
|
||||
* exclusion is required throughout the process. However,
|
||||
* while waiting asynchronously for the 'per controller
|
||||
* percpu_ref' to complete before the 'preempt and abort'
|
||||
* command finishes, a semaphore is needed to ensure mutual
|
||||
* exclusion instead of a mutex.
|
||||
*/
|
||||
struct semaphore pr_sem;
|
||||
struct list_head registrant_list;
|
||||
};
|
||||
|
||||
struct nvmet_pr_per_ctrl_ref {
|
||||
struct percpu_ref ref;
|
||||
struct completion free_done;
|
||||
struct completion confirm_done;
|
||||
uuid_t hostid;
|
||||
};
|
||||
|
||||
struct nvmet_ns {
|
||||
struct percpu_ref ref;
|
||||
struct file *bdev_file;
|
||||
@ -85,6 +119,8 @@ struct nvmet_ns {
|
||||
int pi_type;
|
||||
int metadata_size;
|
||||
u8 csi;
|
||||
struct nvmet_pr pr;
|
||||
struct xarray pr_per_ctrl_refs;
|
||||
};
|
||||
|
||||
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
|
||||
@ -191,6 +227,13 @@ static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
|
||||
return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
|
||||
}
|
||||
|
||||
struct nvmet_pr_log_mgr {
|
||||
struct mutex lock;
|
||||
u64 lost_count;
|
||||
u64 counter;
|
||||
DECLARE_KFIFO(log_queue, struct nvme_pr_log, NVMET_PR_LOG_QUEUE_SIZE);
|
||||
};
|
||||
|
||||
struct nvmet_ctrl {
|
||||
struct nvmet_subsys *subsys;
|
||||
struct nvmet_sq **sqs;
|
||||
@ -246,6 +289,7 @@ struct nvmet_ctrl {
|
||||
u8 *dh_key;
|
||||
size_t dh_keysize;
|
||||
#endif
|
||||
struct nvmet_pr_log_mgr pr_log_mgr;
|
||||
};
|
||||
|
||||
struct nvmet_subsys {
|
||||
@ -396,6 +440,9 @@ struct nvmet_req {
|
||||
struct work_struct zmgmt_work;
|
||||
} z;
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
struct {
|
||||
struct work_struct abort_work;
|
||||
} r;
|
||||
};
|
||||
int sg_cnt;
|
||||
int metadata_sg_cnt;
|
||||
@ -412,6 +459,7 @@ struct nvmet_req {
|
||||
struct device *p2p_client;
|
||||
u16 error_loc;
|
||||
u64 error_slba;
|
||||
struct nvmet_pr_per_ctrl_ref *pc_ref;
|
||||
};
|
||||
|
||||
#define NVMET_MAX_MPOOL_BVEC 16
|
||||
@ -498,7 +546,8 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
|
||||
|
||||
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
|
||||
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
|
||||
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
|
||||
uuid_t *hostid);
|
||||
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
|
||||
const char *hostnqn, u16 cntlid,
|
||||
struct nvmet_req *req);
|
||||
@ -761,4 +810,18 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
|
||||
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
|
||||
#endif
|
||||
|
||||
int nvmet_pr_init_ns(struct nvmet_ns *ns);
|
||||
u16 nvmet_parse_pr_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_pr_check_cmd_access(struct nvmet_req *req);
|
||||
int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl);
|
||||
void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl);
|
||||
void nvmet_pr_exit_ns(struct nvmet_ns *ns);
|
||||
void nvmet_execute_get_log_page_resv(struct nvmet_req *req);
|
||||
u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask);
|
||||
u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req);
|
||||
u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req);
|
||||
static inline void nvmet_pr_put_ns_pc_ref(struct nvmet_pr_per_ctrl_ref *pc_ref)
|
||||
{
|
||||
percpu_ref_put(&pc_ref->ref);
|
||||
}
|
||||
#endif /* _NVMET_H */
|
||||
|
1156
drivers/nvme/target/pr.c
Normal file
1156
drivers/nvme/target/pr.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -180,6 +180,106 @@ static const char *nvmet_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvmet_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const rrega_strs[] = {
|
||||
[0x00] = "register",
|
||||
[0x01] = "unregister",
|
||||
[0x02] = "replace",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 rrega = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 ptpl = (cdw10[3] >> 6) & 0x3;
|
||||
const char *rrega_str;
|
||||
|
||||
if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
|
||||
rrega_str = rrega_strs[rrega];
|
||||
else
|
||||
rrega_str = "reserved";
|
||||
|
||||
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
|
||||
rrega, rrega_str, iekey, ptpl);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char * const rtype_strs[] = {
|
||||
[0x00] = "reserved",
|
||||
[0x01] = "write exclusive",
|
||||
[0x02] = "exclusive access",
|
||||
[0x03] = "write exclusive registrants only",
|
||||
[0x04] = "exclusive access registrants only",
|
||||
[0x05] = "write exclusive all registrants",
|
||||
[0x06] = "exclusive access all registrants",
|
||||
};
|
||||
|
||||
static const char *nvmet_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const racqa_strs[] = {
|
||||
[0x00] = "acquire",
|
||||
[0x01] = "preempt",
|
||||
[0x02] = "preempt and abort",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 racqa = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 rtype = cdw10[1];
|
||||
const char *racqa_str = "reserved";
|
||||
const char *rtype_str = "reserved";
|
||||
|
||||
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
|
||||
racqa_str = racqa_strs[racqa];
|
||||
|
||||
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
|
||||
rtype_str = rtype_strs[rtype];
|
||||
|
||||
trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
|
||||
racqa, racqa_str, iekey, rtype, rtype_str);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvmet_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
static const char * const rrela_strs[] = {
|
||||
[0x00] = "release",
|
||||
[0x01] = "clear",
|
||||
};
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 rrela = cdw10[0] & 0x7;
|
||||
u8 iekey = (cdw10[0] >> 3) & 0x1;
|
||||
u8 rtype = cdw10[1];
|
||||
const char *rrela_str = "reserved";
|
||||
const char *rtype_str = "reserved";
|
||||
|
||||
if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
|
||||
rrela_str = rrela_strs[rrela];
|
||||
|
||||
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
|
||||
rtype_str = rtype_strs[rtype];
|
||||
|
||||
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
|
||||
rrela, rrela_str, iekey, rtype, rtype_str);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvmet_trace_resv_report(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u32 numd = get_unaligned_le32(cdw10);
|
||||
u8 eds = cdw10[4] & 0x1;
|
||||
|
||||
trace_seq_printf(p, "numd=%u, eds=%u", numd, eds);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
|
||||
u8 opcode, u8 *cdw10)
|
||||
{
|
||||
@ -195,6 +295,14 @@ const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
|
||||
return nvmet_trace_zone_mgmt_send(p, cdw10);
|
||||
case nvme_cmd_zone_mgmt_recv:
|
||||
return nvmet_trace_zone_mgmt_recv(p, cdw10);
|
||||
case nvme_cmd_resv_register:
|
||||
return nvmet_trace_resv_reg(p, cdw10);
|
||||
case nvme_cmd_resv_acquire:
|
||||
return nvmet_trace_resv_acq(p, cdw10);
|
||||
case nvme_cmd_resv_release:
|
||||
return nvmet_trace_resv_rel(p, cdw10);
|
||||
case nvme_cmd_resv_report:
|
||||
return nvmet_trace_resv_report(p, cdw10);
|
||||
default:
|
||||
return nvmet_trace_common(p, cdw10);
|
||||
}
|
||||
|
@ -537,6 +537,7 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
|
||||
u16 status = NVME_SC_SUCCESS;
|
||||
unsigned int total_len = 0;
|
||||
struct scatterlist *sg;
|
||||
u32 data_len = nvmet_rw_data_len(req);
|
||||
struct bio *bio;
|
||||
int sg_cnt;
|
||||
|
||||
@ -544,6 +545,13 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
|
||||
if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
|
||||
return;
|
||||
|
||||
if (data_len >
|
||||
bdev_max_zone_append_sectors(req->ns->bdev) << SECTOR_SHIFT) {
|
||||
req->error_loc = offsetof(struct nvme_rw_command, length);
|
||||
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!req->sg_cnt) {
|
||||
nvmet_req_complete(req, 0);
|
||||
return;
|
||||
@ -576,20 +584,17 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
|
||||
bio->bi_opf |= REQ_FUA;
|
||||
|
||||
for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
|
||||
struct page *p = sg_page(sg);
|
||||
unsigned int l = sg->length;
|
||||
unsigned int o = sg->offset;
|
||||
unsigned int ret;
|
||||
unsigned int len = sg->length;
|
||||
|
||||
ret = bio_add_zone_append_page(bio, p, l, o);
|
||||
if (ret != sg->length) {
|
||||
if (bio_add_pc_page(bdev_get_queue(bio->bi_bdev), bio,
|
||||
sg_page(sg), len, sg->offset) != len) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out_put_bio;
|
||||
}
|
||||
total_len += sg->length;
|
||||
total_len += len;
|
||||
}
|
||||
|
||||
if (total_len != nvmet_rw_data_len(req)) {
|
||||
if (total_len != data_len) {
|
||||
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
|
||||
goto out_put_bio;
|
||||
}
|
||||
|
@ -2117,7 +2117,7 @@ int dasd_flush_device_queue(struct dasd_device *device)
|
||||
case DASD_CQR_IN_IO:
|
||||
rc = device->discipline->term_IO(cqr);
|
||||
if (rc) {
|
||||
/* unable to terminate requeust */
|
||||
/* unable to terminate request */
|
||||
dev_err(&device->cdev->dev,
|
||||
"Flushing the DASD request queue failed\n");
|
||||
/* stop flush processing */
|
||||
|
@ -855,7 +855,7 @@ dasd_delete_device(struct dasd_device *device)
|
||||
dev_set_drvdata(&device->cdev->dev, NULL);
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
|
||||
|
||||
/* Removve copy relation */
|
||||
/* Remove copy relation */
|
||||
dasd_devmap_delete_copy_relation_device(device);
|
||||
/*
|
||||
* Drop ref_count by 3, one for the devmap reference, one for
|
||||
|
@ -2405,7 +2405,7 @@ static int dasd_eckd_end_analysis(struct dasd_block *block)
|
||||
}
|
||||
|
||||
if (count_area != NULL && count_area->kl == 0) {
|
||||
/* we found notthing violating our disk layout */
|
||||
/* we found nothing violating our disk layout */
|
||||
if (dasd_check_blocksize(count_area->dl) == 0)
|
||||
block->bp_block = count_area->dl;
|
||||
}
|
||||
|
@ -350,6 +350,7 @@ dasd_proc_init(void)
|
||||
remove_proc_entry("devices", dasd_proc_root_entry);
|
||||
out_nodevices:
|
||||
remove_proc_entry("dasd", NULL);
|
||||
dasd_proc_root_entry = NULL;
|
||||
out_nodasd:
|
||||
return -ENOENT;
|
||||
}
|
||||
@ -357,7 +358,11 @@ dasd_proc_init(void)
|
||||
void
|
||||
dasd_proc_exit(void)
|
||||
{
|
||||
if (!dasd_proc_root_entry)
|
||||
return;
|
||||
|
||||
remove_proc_entry("devices", dasd_proc_root_entry);
|
||||
remove_proc_entry("statistics", dasd_proc_root_entry);
|
||||
remove_proc_entry("dasd", NULL);
|
||||
dasd_proc_root_entry = NULL;
|
||||
}
|
||||
|
@ -1190,8 +1190,8 @@ static u8 sd_group_number(struct scsi_cmnd *cmd)
|
||||
if (!sdkp->rscs)
|
||||
return 0;
|
||||
|
||||
return min3((u32)rq->write_hint, (u32)sdkp->permanent_stream_count,
|
||||
0x3fu);
|
||||
return min3((u32)rq->bio->bi_write_hint,
|
||||
(u32)sdkp->permanent_stream_count, 0x3fu);
|
||||
}
|
||||
|
||||
static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
|
||||
@ -1389,7 +1389,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
|
||||
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
|
||||
protect | fua, dld);
|
||||
} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
|
||||
sdp->use_10_for_rw || protect || rq->write_hint) {
|
||||
sdp->use_10_for_rw || protect || rq->bio->bi_write_hint) {
|
||||
ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
|
||||
protect | fua);
|
||||
} else {
|
||||
|
@ -633,8 +633,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
|
||||
lim->max_open_zones = sdkp->zones_max_open;
|
||||
lim->max_active_zones = 0;
|
||||
lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks);
|
||||
/* Enable block layer zone append emulation */
|
||||
lim->max_zone_append_sectors = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -707,11 +707,14 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
* zoned mode. In this case, we don't have a valid max zone
|
||||
* append size.
|
||||
*/
|
||||
if (bdev_is_zoned(device->bdev)) {
|
||||
blk_stack_limits(lim,
|
||||
&bdev_get_queue(device->bdev)->limits,
|
||||
0);
|
||||
}
|
||||
if (bdev_is_zoned(device->bdev))
|
||||
blk_stack_limits(lim, bdev_limits(device->bdev), 0);
|
||||
}
|
||||
|
||||
ret = blk_validate_limits(lim);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "zoned: failed to validate queue limits");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
|
||||
unsigned int nr);
|
||||
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
|
||||
unsigned int offset);
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
|
||||
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
|
||||
void bio_integrity_unmap_user(struct bio *bio);
|
||||
bool bio_integrity_prep(struct bio *bio);
|
||||
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
|
||||
@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs)
|
||||
}
|
||||
|
||||
static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
|
||||
ssize_t len, u32 seed)
|
||||
ssize_t len)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -418,8 +418,6 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
|
||||
size_t len, size_t off);
|
||||
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
|
||||
unsigned int, unsigned int);
|
||||
int bio_add_zone_append_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset);
|
||||
void __bio_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int off);
|
||||
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
|
||||
@ -677,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio)
|
||||
bio->bi_opf &= ~REQ_POLLED;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_is_zone_append - is this a zone append bio?
|
||||
* @bio: bio to check
|
||||
*
|
||||
* Check if @bio is a zone append operation. Core block layer code and end_io
|
||||
* handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check
|
||||
* because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if
|
||||
* it is not natively supported.
|
||||
*/
|
||||
static inline bool bio_is_zone_append(struct bio *bio)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
|
||||
return false;
|
||||
return bio_op(bio) == REQ_OP_ZONE_APPEND ||
|
||||
bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
|
||||
}
|
||||
|
||||
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
|
||||
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
|
||||
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
|
||||
|
@ -28,7 +28,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
|
||||
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
|
||||
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
|
||||
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
|
||||
ssize_t bytes, u32 seed);
|
||||
ssize_t bytes);
|
||||
|
||||
static inline bool
|
||||
blk_integrity_queue_supports_integrity(struct request_queue *q)
|
||||
@ -104,8 +104,7 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
|
||||
}
|
||||
static inline int blk_rq_integrity_map_user(struct request *rq,
|
||||
void __user *ubuf,
|
||||
ssize_t bytes,
|
||||
u32 seed)
|
||||
ssize_t bytes)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -156,9 +156,6 @@ struct request {
|
||||
struct blk_crypto_keyslot *crypt_keyslot;
|
||||
#endif
|
||||
|
||||
enum rw_hint write_hint;
|
||||
unsigned short ioprio;
|
||||
|
||||
enum mq_rq_state state;
|
||||
atomic_t ref;
|
||||
|
||||
@ -222,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
|
||||
|
||||
static inline unsigned short req_get_ioprio(struct request *req)
|
||||
{
|
||||
return req->ioprio;
|
||||
if (req->bio)
|
||||
return req->bio->bi_ioprio;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
|
||||
@ -230,62 +229,61 @@ static inline unsigned short req_get_ioprio(struct request *req)
|
||||
#define rq_dma_dir(rq) \
|
||||
(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
|
||||
|
||||
#define rq_list_add(listptr, rq) do { \
|
||||
(rq)->rq_next = *(listptr); \
|
||||
*(listptr) = rq; \
|
||||
} while (0)
|
||||
|
||||
#define rq_list_add_tail(lastpptr, rq) do { \
|
||||
(rq)->rq_next = NULL; \
|
||||
**(lastpptr) = rq; \
|
||||
*(lastpptr) = &rq->rq_next; \
|
||||
} while (0)
|
||||
|
||||
#define rq_list_pop(listptr) \
|
||||
({ \
|
||||
struct request *__req = NULL; \
|
||||
if ((listptr) && *(listptr)) { \
|
||||
__req = *(listptr); \
|
||||
*(listptr) = __req->rq_next; \
|
||||
} \
|
||||
__req; \
|
||||
})
|
||||
|
||||
#define rq_list_peek(listptr) \
|
||||
({ \
|
||||
struct request *__req = NULL; \
|
||||
if ((listptr) && *(listptr)) \
|
||||
__req = *(listptr); \
|
||||
__req; \
|
||||
})
|
||||
|
||||
#define rq_list_for_each(listptr, pos) \
|
||||
for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
|
||||
|
||||
#define rq_list_for_each_safe(listptr, pos, nxt) \
|
||||
for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \
|
||||
pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
|
||||
|
||||
#define rq_list_next(rq) (rq)->rq_next
|
||||
#define rq_list_empty(list) ((list) == (struct request *) NULL)
|
||||
|
||||
/**
|
||||
* rq_list_move() - move a struct request from one list to another
|
||||
* @src: The source list @rq is currently in
|
||||
* @dst: The destination list that @rq will be appended to
|
||||
* @rq: The request to move
|
||||
* @prev: The request preceding @rq in @src (NULL if @rq is the head)
|
||||
*/
|
||||
static inline void rq_list_move(struct request **src, struct request **dst,
|
||||
struct request *rq, struct request *prev)
|
||||
static inline int rq_list_empty(const struct rq_list *rl)
|
||||
{
|
||||
if (prev)
|
||||
prev->rq_next = rq->rq_next;
|
||||
else
|
||||
*src = rq->rq_next;
|
||||
rq_list_add(dst, rq);
|
||||
return rl->head == NULL;
|
||||
}
|
||||
|
||||
static inline void rq_list_init(struct rq_list *rl)
|
||||
{
|
||||
rl->head = NULL;
|
||||
rl->tail = NULL;
|
||||
}
|
||||
|
||||
static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq)
|
||||
{
|
||||
rq->rq_next = NULL;
|
||||
if (rl->tail)
|
||||
rl->tail->rq_next = rq;
|
||||
else
|
||||
rl->head = rq;
|
||||
rl->tail = rq;
|
||||
}
|
||||
|
||||
static inline void rq_list_add_head(struct rq_list *rl, struct request *rq)
|
||||
{
|
||||
rq->rq_next = rl->head;
|
||||
rl->head = rq;
|
||||
if (!rl->tail)
|
||||
rl->tail = rq;
|
||||
}
|
||||
|
||||
static inline struct request *rq_list_pop(struct rq_list *rl)
|
||||
{
|
||||
struct request *rq = rl->head;
|
||||
|
||||
if (rq) {
|
||||
rl->head = rl->head->rq_next;
|
||||
if (!rl->head)
|
||||
rl->tail = NULL;
|
||||
rq->rq_next = NULL;
|
||||
}
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
static inline struct request *rq_list_peek(struct rq_list *rl)
|
||||
{
|
||||
return rl->head;
|
||||
}
|
||||
|
||||
#define rq_list_for_each(rl, pos) \
|
||||
for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next)
|
||||
|
||||
#define rq_list_for_each_safe(rl, pos, nxt) \
|
||||
for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \
|
||||
pos; pos = nxt, nxt = pos ? pos->rq_next : NULL)
|
||||
|
||||
/**
|
||||
* enum blk_eh_timer_return - How the timeout handler should proceed
|
||||
* @BLK_EH_DONE: The block driver completed the command or will complete it at
|
||||
@ -577,7 +575,7 @@ struct blk_mq_ops {
|
||||
* empty the @rqlist completely, then the rest will be queued
|
||||
* individually by the block layer upon return.
|
||||
*/
|
||||
void (*queue_rqs)(struct request **rqlist);
|
||||
void (*queue_rqs)(struct rq_list *rqlist);
|
||||
|
||||
/**
|
||||
* @get_budget: Reserve budget before queue request, once .queue_rq is
|
||||
@ -857,12 +855,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
|
||||
*/
|
||||
static inline bool blk_mq_need_time_stamp(struct request *rq)
|
||||
{
|
||||
/*
|
||||
* passthrough io doesn't use iostat accounting, cgroup stats
|
||||
* and io scheduler functionalities.
|
||||
*/
|
||||
if (blk_rq_is_passthrough(rq))
|
||||
return false;
|
||||
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
|
||||
}
|
||||
|
||||
@ -892,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
|
||||
else if (iob->complete != complete)
|
||||
return false;
|
||||
iob->need_ts |= blk_mq_need_time_stamp(req);
|
||||
rq_list_add(&iob->req_list, req);
|
||||
rq_list_add_tail(&iob->req_list, req);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -925,6 +917,8 @@ void blk_freeze_queue_start(struct request_queue *q);
|
||||
void blk_mq_freeze_queue_wait(struct request_queue *q);
|
||||
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
|
||||
unsigned long timeout);
|
||||
void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
|
||||
void blk_freeze_queue_start_non_owner(struct request_queue *q);
|
||||
|
||||
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
|
||||
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
|
||||
@ -989,7 +983,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
|
||||
rq->nr_phys_segments = nr_segs;
|
||||
rq->__data_len = bio->bi_iter.bi_size;
|
||||
rq->bio = rq->biotail = bio;
|
||||
rq->ioprio = bio_prio(bio);
|
||||
}
|
||||
|
||||
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
struct module;
|
||||
struct request_queue;
|
||||
@ -194,7 +195,7 @@ struct gendisk {
|
||||
unsigned int nr_zones;
|
||||
unsigned int zone_capacity;
|
||||
unsigned int last_zone_capacity;
|
||||
unsigned long *conv_zones_bitmap;
|
||||
unsigned long __rcu *conv_zones_bitmap;
|
||||
unsigned int zone_wplugs_hash_bits;
|
||||
spinlock_t zone_wplugs_lock;
|
||||
struct mempool_s *zone_wplugs_pool;
|
||||
@ -349,6 +350,9 @@ typedef unsigned int __bitwise blk_flags_t;
|
||||
/* I/O topology is misaligned */
|
||||
#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
|
||||
|
||||
/* passthrough command IO accounting */
|
||||
#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2))
|
||||
|
||||
struct queue_limits {
|
||||
blk_features_t features;
|
||||
blk_flags_t flags;
|
||||
@ -371,6 +375,7 @@ struct queue_limits {
|
||||
unsigned int max_user_discard_sectors;
|
||||
unsigned int max_secure_erase_sectors;
|
||||
unsigned int max_write_zeroes_sectors;
|
||||
unsigned int max_hw_zone_append_sectors;
|
||||
unsigned int max_zone_append_sectors;
|
||||
unsigned int discard_granularity;
|
||||
unsigned int discard_alignment;
|
||||
@ -471,6 +476,11 @@ struct request_queue {
|
||||
struct xarray hctx_table;
|
||||
|
||||
struct percpu_ref q_usage_counter;
|
||||
struct lock_class_key io_lock_cls_key;
|
||||
struct lockdep_map io_lockdep_map;
|
||||
|
||||
struct lock_class_key q_lock_cls_key;
|
||||
struct lockdep_map q_lockdep_map;
|
||||
|
||||
struct request *last_merge;
|
||||
|
||||
@ -566,6 +576,10 @@ struct request_queue {
|
||||
struct throtl_data *td;
|
||||
#endif
|
||||
struct rcu_head rcu_head;
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
struct task_struct *mq_freeze_owner;
|
||||
int mq_freeze_owner_depth;
|
||||
#endif
|
||||
wait_queue_head_t mq_freeze_wq;
|
||||
/*
|
||||
* Protect concurrent access to q_usage_counter by
|
||||
@ -617,6 +631,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
|
||||
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
|
||||
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
|
||||
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
|
||||
#define blk_queue_passthrough_stat(q) \
|
||||
((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH)
|
||||
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
|
||||
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
|
||||
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
|
||||
@ -725,6 +741,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
|
||||
#define for_each_bio(_bio) \
|
||||
for (; _bio; _bio = _bio->bi_next)
|
||||
|
||||
int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups,
|
||||
struct fwnode_handle *fwnode);
|
||||
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups);
|
||||
static inline int __must_check add_disk(struct gendisk *disk)
|
||||
@ -929,6 +948,7 @@ queue_limits_start_update(struct request_queue *q)
|
||||
int queue_limits_commit_update(struct request_queue *q,
|
||||
struct queue_limits *lim);
|
||||
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
|
||||
int blk_validate_limits(struct queue_limits *lim);
|
||||
|
||||
/**
|
||||
* queue_limits_cancel_update - cancel an atomic update of queue limits
|
||||
@ -986,6 +1006,11 @@ extern void blk_put_queue(struct request_queue *);
|
||||
|
||||
void blk_mark_disk_dead(struct gendisk *disk);
|
||||
|
||||
struct rq_list {
|
||||
struct request *head;
|
||||
struct request *tail;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
/*
|
||||
* blk_plug permits building a queue of related requests by holding the I/O
|
||||
@ -999,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk);
|
||||
* blk_flush_plug() is called.
|
||||
*/
|
||||
struct blk_plug {
|
||||
struct request *mq_list; /* blk-mq requests */
|
||||
struct rq_list mq_list; /* blk-mq requests */
|
||||
|
||||
/* if ios_left is > 1, we can batch tag/rq allocations */
|
||||
struct request *cached_rq;
|
||||
struct rq_list cached_rqs;
|
||||
u64 cur_ktime;
|
||||
unsigned short nr_ios;
|
||||
|
||||
@ -1145,6 +1170,11 @@ enum blk_default_limits {
|
||||
*/
|
||||
#define BLK_DEF_MAX_SECTORS_CAP 2560u
|
||||
|
||||
static inline struct queue_limits *bdev_limits(struct block_device *bdev)
|
||||
{
|
||||
return &bdev_get_queue(bdev)->limits;
|
||||
}
|
||||
|
||||
static inline unsigned long queue_segment_boundary(const struct request_queue *q)
|
||||
{
|
||||
return q->limits.seg_boundary_mask;
|
||||
@ -1185,25 +1215,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
|
||||
return q->limits.max_segment_size;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
queue_limits_max_zone_append_sectors(const struct queue_limits *l)
|
||||
{
|
||||
unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
|
||||
|
||||
return min_not_zero(l->max_zone_append_sectors, max_sectors);
|
||||
}
|
||||
|
||||
static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
|
||||
{
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
|
||||
return queue_limits_max_zone_append_sectors(&q->limits);
|
||||
}
|
||||
|
||||
static inline bool queue_emulates_zone_append(struct request_queue *q)
|
||||
{
|
||||
return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
|
||||
return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
|
||||
}
|
||||
|
||||
static inline bool bdev_emulates_zone_append(struct block_device *bdev)
|
||||
@ -1214,7 +1228,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
|
||||
static inline unsigned int
|
||||
bdev_max_zone_append_sectors(struct block_device *bdev)
|
||||
{
|
||||
return queue_max_zone_append_sectors(bdev_get_queue(bdev));
|
||||
return bdev_limits(bdev)->max_zone_append_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_max_segments(struct block_device *bdev)
|
||||
@ -1279,23 +1293,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev);
|
||||
|
||||
static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
|
||||
{
|
||||
return bdev_get_queue(bdev)->limits.max_discard_sectors;
|
||||
return bdev_limits(bdev)->max_discard_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
|
||||
{
|
||||
return bdev_get_queue(bdev)->limits.discard_granularity;
|
||||
return bdev_limits(bdev)->discard_granularity;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
bdev_max_secure_erase_sectors(struct block_device *bdev)
|
||||
{
|
||||
return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
|
||||
return bdev_limits(bdev)->max_secure_erase_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
|
||||
{
|
||||
return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors;
|
||||
return bdev_limits(bdev)->max_write_zeroes_sectors;
|
||||
}
|
||||
|
||||
static inline bool bdev_nonrot(struct block_device *bdev)
|
||||
@ -1331,7 +1345,7 @@ static inline bool bdev_write_cache(struct block_device *bdev)
|
||||
|
||||
static inline bool bdev_fua(struct block_device *bdev)
|
||||
{
|
||||
return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
|
||||
return bdev_limits(bdev)->features & BLK_FEAT_FUA;
|
||||
}
|
||||
|
||||
static inline bool bdev_nowait(struct block_device *bdev)
|
||||
@ -1376,6 +1390,33 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
|
||||
return bdev_offset_from_zone_start(bdev, sector) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bdev_zone_is_seq - check if a sector belongs to a sequential write zone
|
||||
* @bdev: block device to check
|
||||
* @sector: sector number
|
||||
*
|
||||
* Check if @sector on @bdev is contained in a sequential write required zone.
|
||||
*/
|
||||
static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
||||
{
|
||||
bool is_seq = false;
|
||||
|
||||
#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
|
||||
if (bdev_is_zoned(bdev)) {
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
unsigned long *bitmap;
|
||||
|
||||
rcu_read_lock();
|
||||
bitmap = rcu_dereference(disk->conv_zones_bitmap);
|
||||
is_seq = !bitmap ||
|
||||
!test_bit(disk_zone_no(disk, sector), bitmap);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
return is_seq;
|
||||
}
|
||||
|
||||
static inline int queue_dma_alignment(const struct request_queue *q)
|
||||
{
|
||||
return q->limits.dma_alignment;
|
||||
@ -1648,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev);
|
||||
void bdev_fput(struct file *bdev_file);
|
||||
|
||||
struct io_comp_batch {
|
||||
struct request *req_list;
|
||||
struct rq_list req_list;
|
||||
bool need_ts;
|
||||
void (*complete)(struct io_comp_batch *);
|
||||
};
|
||||
|
@ -327,7 +327,8 @@ struct nvme_id_ctrl {
|
||||
__le32 sanicap;
|
||||
__le32 hmminds;
|
||||
__le16 hmmaxd;
|
||||
__u8 rsvd338[4];
|
||||
__le16 nvmsetidmax;
|
||||
__le16 endgidmax;
|
||||
__u8 anatt;
|
||||
__u8 anacap;
|
||||
__le32 anagrpmax;
|
||||
@ -522,6 +523,7 @@ enum {
|
||||
NVME_ID_CNS_NS_DESC_LIST = 0x03,
|
||||
NVME_ID_CNS_CS_NS = 0x05,
|
||||
NVME_ID_CNS_CS_CTRL = 0x06,
|
||||
NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07,
|
||||
NVME_ID_CNS_NS_CS_INDEP = 0x08,
|
||||
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
|
||||
NVME_ID_CNS_NS_PRESENT = 0x11,
|
||||
@ -530,6 +532,7 @@ enum {
|
||||
NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
|
||||
NVME_ID_CNS_NS_GRANULARITY = 0x16,
|
||||
NVME_ID_CNS_UUID_LIST = 0x17,
|
||||
NVME_ID_CNS_ENDGRP_LIST = 0x19,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -560,6 +563,8 @@ enum {
|
||||
NVME_NS_FLBAS_LBA_SHIFT = 1,
|
||||
NVME_NS_FLBAS_META_EXT = 0x10,
|
||||
NVME_NS_NMIC_SHARED = 1 << 0,
|
||||
NVME_NS_ROTATIONAL = 1 << 4,
|
||||
NVME_NS_VWC_NOT_PRESENT = 1 << 5,
|
||||
NVME_LBAF_RP_BEST = 0,
|
||||
NVME_LBAF_RP_BETTER = 1,
|
||||
NVME_LBAF_RP_GOOD = 2,
|
||||
@ -617,6 +622,40 @@ enum {
|
||||
NVME_NIDT_CSI = 0x04,
|
||||
};
|
||||
|
||||
struct nvme_endurance_group_log {
|
||||
__u8 egcw;
|
||||
__u8 egfeat;
|
||||
__u8 rsvd2;
|
||||
__u8 avsp;
|
||||
__u8 avspt;
|
||||
__u8 pused;
|
||||
__le16 did;
|
||||
__u8 rsvd8[24];
|
||||
__u8 ee[16];
|
||||
__u8 dur[16];
|
||||
__u8 duw[16];
|
||||
__u8 muw[16];
|
||||
__u8 hrc[16];
|
||||
__u8 hwc[16];
|
||||
__u8 mdie[16];
|
||||
__u8 neile[16];
|
||||
__u8 tegcap[16];
|
||||
__u8 uegcap[16];
|
||||
__u8 rsvd192[320];
|
||||
};
|
||||
|
||||
struct nvme_rotational_media_log {
|
||||
__le16 endgid;
|
||||
__le16 numa;
|
||||
__le16 nrs;
|
||||
__u8 rsvd6[2];
|
||||
__le32 spinc;
|
||||
__le32 fspinc;
|
||||
__le32 ldc;
|
||||
__le32 fldc;
|
||||
__u8 rsvd24[488];
|
||||
};
|
||||
|
||||
struct nvme_smart_log {
|
||||
__u8 critical_warning;
|
||||
__u8 temperature[2];
|
||||
@ -1244,6 +1283,7 @@ enum {
|
||||
NVME_FEAT_WRITE_PROTECT = 0x84,
|
||||
NVME_FEAT_VENDOR_START = 0xC0,
|
||||
NVME_FEAT_VENDOR_END = 0xFF,
|
||||
NVME_LOG_SUPPORTED = 0x00,
|
||||
NVME_LOG_ERROR = 0x01,
|
||||
NVME_LOG_SMART = 0x02,
|
||||
NVME_LOG_FW_SLOT = 0x03,
|
||||
@ -1254,6 +1294,8 @@ enum {
|
||||
NVME_LOG_TELEMETRY_CTRL = 0x08,
|
||||
NVME_LOG_ENDURANCE_GROUP = 0x09,
|
||||
NVME_LOG_ANA = 0x0c,
|
||||
NVME_LOG_FEATURES = 0x12,
|
||||
NVME_LOG_RMI = 0x16,
|
||||
NVME_LOG_DISC = 0x70,
|
||||
NVME_LOG_RESERVATION = 0x80,
|
||||
NVME_FWACT_REPL = (0 << 3),
|
||||
@ -1261,6 +1303,24 @@ enum {
|
||||
NVME_FWACT_ACTV = (2 << 3),
|
||||
};
|
||||
|
||||
struct nvme_supported_log {
|
||||
__le32 lids[256];
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_LIDS_LSUPP = 1 << 0,
|
||||
};
|
||||
|
||||
struct nvme_supported_features_log {
|
||||
__le32 fis[256];
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_FIS_FSUPP = 1 << 0,
|
||||
NVME_FIS_NSCPE = 1 << 20,
|
||||
NVME_FIS_CSCPE = 1 << 21,
|
||||
};
|
||||
|
||||
/* NVMe Namespace Write Protect State */
|
||||
enum {
|
||||
NVME_NS_NO_WRITE_PROTECT = 0,
|
||||
@ -1281,7 +1341,8 @@ struct nvme_identify {
|
||||
__u8 cns;
|
||||
__u8 rsvd3;
|
||||
__le16 ctrlid;
|
||||
__u8 rsvd11[3];
|
||||
__le16 cnssid;
|
||||
__u8 rsvd11;
|
||||
__u8 csi;
|
||||
__u32 rsvd12[4];
|
||||
};
|
||||
@ -1389,7 +1450,7 @@ struct nvme_get_log_page_command {
|
||||
__u8 lsp; /* upper 4 bits reserved */
|
||||
__le16 numdl;
|
||||
__le16 numdu;
|
||||
__u16 rsvd11;
|
||||
__le16 lsi;
|
||||
union {
|
||||
struct {
|
||||
__le32 lpol;
|
||||
@ -2037,4 +2098,72 @@ struct nvme_completion {
|
||||
#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
|
||||
#define NVME_TERTIARY(ver) ((ver) & 0xff)
|
||||
|
||||
enum {
|
||||
NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
|
||||
NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
|
||||
NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
|
||||
NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1,
|
||||
NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2,
|
||||
NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3,
|
||||
};
|
||||
|
||||
struct nvme_pr_log {
|
||||
__le64 count;
|
||||
__u8 type;
|
||||
__u8 nr_pages;
|
||||
__u8 rsvd1[2];
|
||||
__le32 nsid;
|
||||
__u8 rsvd2[48];
|
||||
};
|
||||
|
||||
struct nvmet_pr_register_data {
|
||||
__le64 crkey;
|
||||
__le64 nrkey;
|
||||
};
|
||||
|
||||
struct nvmet_pr_acquire_data {
|
||||
__le64 crkey;
|
||||
__le64 prkey;
|
||||
};
|
||||
|
||||
struct nvmet_pr_release_data {
|
||||
__le64 crkey;
|
||||
};
|
||||
|
||||
enum nvme_pr_capabilities {
|
||||
NVME_PR_SUPPORT_PTPL = 1,
|
||||
NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1,
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2,
|
||||
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3,
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4,
|
||||
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5,
|
||||
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6,
|
||||
NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7,
|
||||
};
|
||||
|
||||
enum nvme_pr_register_action {
|
||||
NVME_PR_REGISTER_ACT_REG = 0,
|
||||
NVME_PR_REGISTER_ACT_UNREG = 1,
|
||||
NVME_PR_REGISTER_ACT_REPLACE = 1 << 1,
|
||||
};
|
||||
|
||||
enum nvme_pr_acquire_action {
|
||||
NVME_PR_ACQUIRE_ACT_ACQUIRE = 0,
|
||||
NVME_PR_ACQUIRE_ACT_PREEMPT = 1,
|
||||
NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1,
|
||||
};
|
||||
|
||||
enum nvme_pr_release_action {
|
||||
NVME_PR_RELEASE_ACT_RELEASE = 0,
|
||||
NVME_PR_RELEASE_ACT_CLEAR = 1,
|
||||
};
|
||||
|
||||
#endif /* _LINUX_NVME_H */
|
||||
|
@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
|
||||
case IOC_OPAL_GET_GEOMETRY:
|
||||
case IOC_OPAL_DISCOVERY:
|
||||
case IOC_OPAL_REVERT_LSP:
|
||||
case IOC_OPAL_SET_SID_PW:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -99,7 +99,7 @@ TRACE_EVENT(block_rq_requeue,
|
||||
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
|
||||
__entry->sector = blk_rq_trace_sector(rq);
|
||||
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
|
||||
__entry->ioprio = rq->ioprio;
|
||||
__entry->ioprio = req_get_ioprio(rq);
|
||||
|
||||
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
|
||||
__get_str(cmd)[0] = '\0';
|
||||
@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
|
||||
__entry->sector = blk_rq_pos(rq);
|
||||
__entry->nr_sector = nr_bytes >> 9;
|
||||
__entry->error = blk_status_to_errno(error);
|
||||
__entry->ioprio = rq->ioprio;
|
||||
__entry->ioprio = req_get_ioprio(rq);
|
||||
|
||||
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
|
||||
__get_str(cmd)[0] = '\0';
|
||||
@ -209,7 +209,7 @@ DECLARE_EVENT_CLASS(block_rq,
|
||||
__entry->sector = blk_rq_trace_sector(rq);
|
||||
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
|
||||
__entry->bytes = blk_rq_bytes(rq);
|
||||
__entry->ioprio = rq->ioprio;
|
||||
__entry->ioprio = req_get_ioprio(rq);
|
||||
|
||||
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
|
||||
__get_str(cmd)[0] = '\0';
|
||||
|
@ -215,5 +215,6 @@ struct opal_revert_lsp {
|
||||
#define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry)
|
||||
#define IOC_OPAL_DISCOVERY _IOW('p', 239, struct opal_discovery)
|
||||
#define IOC_OPAL_REVERT_LSP _IOW('p', 240, struct opal_revert_lsp)
|
||||
#define IOC_OPAL_SET_SID_PW _IOW('p', 241, struct opal_new_pw)
|
||||
|
||||
#endif /* _UAPI_SED_OPAL_H */
|
||||
|
@ -147,8 +147,18 @@
|
||||
*/
|
||||
#define UBLK_F_NEED_GET_DATA (1UL << 2)
|
||||
|
||||
/*
|
||||
* - Block devices are recoverable if ublk server exits and restarts
|
||||
* - Outstanding I/O when ublk server exits is met with errors
|
||||
* - I/O issued while there is no ublk server queues
|
||||
*/
|
||||
#define UBLK_F_USER_RECOVERY (1UL << 3)
|
||||
|
||||
/*
|
||||
* - Block devices are recoverable if ublk server exits and restarts
|
||||
* - Outstanding I/O when ublk server exits is reissued
|
||||
* - I/O issued while there is no ublk server queues
|
||||
*/
|
||||
#define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4)
|
||||
|
||||
/*
|
||||
@ -190,10 +200,18 @@
|
||||
*/
|
||||
#define UBLK_F_ZONED (1ULL << 8)
|
||||
|
||||
/*
|
||||
* - Block devices are recoverable if ublk server exits and restarts
|
||||
* - Outstanding I/O when ublk server exits is met with errors
|
||||
* - I/O issued while there is no ublk server is met with errors
|
||||
*/
|
||||
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
#define UBLK_S_DEV_QUIESCED 2
|
||||
#define UBLK_S_DEV_FAIL_IO 3
|
||||
|
||||
/* shipped via sqe->cmd of io_uring command */
|
||||
struct ublksrv_ctrl_cmd {
|
||||
|
@ -1179,12 +1179,12 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
||||
poll_flags |= BLK_POLL_ONESHOT;
|
||||
|
||||
/* iopoll may have completed current req */
|
||||
if (!rq_list_empty(iob.req_list) ||
|
||||
if (!rq_list_empty(&iob.req_list) ||
|
||||
READ_ONCE(req->iopoll_completed))
|
||||
break;
|
||||
}
|
||||
|
||||
if (!rq_list_empty(iob.req_list))
|
||||
if (!rq_list_empty(&iob.req_list))
|
||||
iob.complete(&iob);
|
||||
else if (!pos)
|
||||
return 0;
|
||||
|
@ -1682,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract a list of contiguous pages from an ITER_BVEC iterator. This does
|
||||
* not get references on the pages, nor does it get a pin on them.
|
||||
* Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
|
||||
* This does not get references on the pages, nor does it get a pin on them.
|
||||
*/
|
||||
static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
|
||||
struct page ***pages, size_t maxsize,
|
||||
@ -1691,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
|
||||
iov_iter_extraction_t extraction_flags,
|
||||
size_t *offset0)
|
||||
{
|
||||
struct page **p, *page;
|
||||
size_t skip = i->iov_offset, offset, size;
|
||||
int k;
|
||||
size_t skip = i->iov_offset, size = 0;
|
||||
struct bvec_iter bi;
|
||||
int k = 0;
|
||||
|
||||
for (;;) {
|
||||
if (i->nr_segs == 0)
|
||||
return 0;
|
||||
size = min(maxsize, i->bvec->bv_len - skip);
|
||||
if (size)
|
||||
break;
|
||||
if (i->nr_segs == 0)
|
||||
return 0;
|
||||
|
||||
if (i->iov_offset == i->bvec->bv_len) {
|
||||
i->iov_offset = 0;
|
||||
i->nr_segs--;
|
||||
i->bvec++;
|
||||
skip = 0;
|
||||
}
|
||||
bi.bi_idx = 0;
|
||||
bi.bi_size = maxsize;
|
||||
bi.bi_bvec_done = skip;
|
||||
|
||||
skip += i->bvec->bv_offset;
|
||||
page = i->bvec->bv_page + skip / PAGE_SIZE;
|
||||
offset = skip % PAGE_SIZE;
|
||||
*offset0 = offset;
|
||||
maxpages = want_pages_array(pages, maxsize, skip, maxpages);
|
||||
|
||||
maxpages = want_pages_array(pages, size, offset, maxpages);
|
||||
if (!maxpages)
|
||||
return -ENOMEM;
|
||||
p = *pages;
|
||||
for (k = 0; k < maxpages; k++)
|
||||
p[k] = page + k;
|
||||
while (bi.bi_size && bi.bi_idx < i->nr_segs) {
|
||||
struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
|
||||
|
||||
/*
|
||||
* The iov_iter_extract_pages interface only allows an offset
|
||||
* into the first page. Break out of the loop if we see an
|
||||
* offset into subsequent pages, the caller will have to call
|
||||
* iov_iter_extract_pages again for the reminder.
|
||||
*/
|
||||
if (k) {
|
||||
if (bv.bv_offset)
|
||||
break;
|
||||
} else {
|
||||
*offset0 = bv.bv_offset;
|
||||
}
|
||||
|
||||
(*pages)[k++] = bv.bv_page;
|
||||
size += bv.bv_len;
|
||||
|
||||
if (k >= maxpages)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We are done when the end of the bvec doesn't align to a page
|
||||
* boundary as that would create a hole in the returned space.
|
||||
* The caller will handle this with another call to
|
||||
* iov_iter_extract_pages.
|
||||
*/
|
||||
if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
|
||||
break;
|
||||
|
||||
bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
|
||||
}
|
||||
|
||||
size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
|
||||
iov_iter_advance(i, size);
|
||||
return size;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user