@@ -928,8 +928,6 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
return false;
if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
return false;
- if (!zone_device_pages_have_same_pgmap(bv->bv_page, page))
- return false;
*same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) &
PAGE_MASK));
@@ -993,6 +991,14 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ /*
+ * When doing ZONE_DEVICE-based P2P transfers, all pages in a
+ * bio must be P2P pages from the same device.
+ */
+ if ((bio->bi_opf & REQ_P2PDMA) &&
+ !zone_device_pages_have_same_pgmap(bv->bv_page, page))
+ return 0;
+
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
same_page)) {
bio->bi_iter.bi_size += len;
@@ -1009,6 +1015,9 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
*/
if (bvec_gap_to_prev(&q->limits, bv, offset))
return 0;
+ } else {
+ if (is_pci_p2pdma_page(page))
+ bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE;
}
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
@@ -1133,11 +1142,24 @@ static int bio_add_page_int(struct bio *bio, struct page *page,
if (bio->bi_iter.bi_size > UINT_MAX - len)
return 0;
- if (bio->bi_vcnt > 0 &&
- bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- page, len, offset, same_page)) {
- bio->bi_iter.bi_size += len;
- return len;
+ if (bio->bi_vcnt > 0) {
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+ /*
+ * When doing ZONE_DEVICE-based P2P transfers, all pages in a
+ * bio must be P2P pages from the same device.
+ */
+ if ((bio->bi_opf & REQ_P2PDMA) &&
+ !zone_device_pages_have_same_pgmap(bv->bv_page, page))
+ return 0;
+
+ if (bvec_try_merge_page(bv, page, len, offset, same_page)) {
+ bio->bi_iter.bi_size += len;
+ return len;
+ }
+ } else {
+ if (is_pci_p2pdma_page(page))
+ bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE;
}
if (bio->bi_vcnt >= bio->bi_max_vecs)
@@ -568,6 +568,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
const struct queue_limits *lim = &q->limits;
unsigned int nsegs = 0, bytes = 0;
struct bio *bio;
+ int error;
size_t i;
if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
@@ -588,15 +589,30 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
for (i = 0; i < nr_segs; i++) {
struct bio_vec *bv = &bvecs[i];
- /*
- * If the queue doesn't support SG gaps and adding this
- * offset would create a gap, fallback to copy.
- */
- if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
- blk_mq_map_bio_put(bio);
- return -EREMOTEIO;
+ error = -EREMOTEIO;
+ if (bvprvp) {
+ /*
+ * If the queue doesn't support SG gaps and adding this
+ * offset would create a gap, fallback to copy.
+ */
+ if (bvec_gap_to_prev(lim, bvprvp, bv->bv_offset))
+ goto put_bio;
+
+ /*
+ * When doing ZONE_DEVICE-based P2P transfers, all pages
+ * in a bio must be P2P pages, and from the same device.
+ */
+ if ((bio->bi_opf & REQ_P2PDMA) &&
+ zone_device_pages_have_same_pgmap(bvprvp->bv_page,
+ bv->bv_page))
+ goto put_bio;
+ } else {
+ if (is_pci_p2pdma_page(bv->bv_page))
+ bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE;
}
+
/* check full condition */
+ error = -EINVAL;
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
goto put_bio;
if (bytes + bv->bv_len > nr_iter)
@@ -611,7 +627,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
return 0;
put_bio:
blk_mq_map_bio_put(bio);
- return -EINVAL;
+ return error;
}
/**
@@ -378,6 +378,7 @@ enum req_flag_bits {
__REQ_DRV, /* for driver use */
__REQ_FS_PRIVATE, /* for file system (submitter) use */
__REQ_ATOMIC, /* for atomic write operations */
+ __REQ_P2PDMA, /* contains P2P DMA pages */
/*
* Command specific flags, keep last:
*/
@@ -410,6 +411,7 @@ enum req_flag_bits {
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
+#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA)
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)