@@ -7,6 +7,8 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
+#include <linux/blk-mq-dma.h>
+#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/part_stat.h>
#include <linux/blk-cgroup.h>
@@ -515,6 +517,167 @@ static bool blk_map_iter_next(struct request *req,
#define blk_phys_to_page(_paddr) \
(pfn_to_page(__phys_to_pfn(_paddr)))
+/*
+ * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page
+ * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so
+ * we need to ensure our segments are aligned to this as well.
+ *
+ * Note that there is no point in using the slightly more complicated IOVA based
+ * path for single segment mappings.
+ */
+static inline bool blk_can_dma_map_iova(struct request *req,
+ struct device *dma_dev)
+{
+ return !((queue_virt_boundary(req->q) + 1) &
+ dma_get_merge_boundary(dma_dev));
+}
+
+static bool blk_dma_map_bus(struct request *req, struct device *dma_dev,
+ struct blk_dma_iter *iter, struct phys_vec *vec)
+{
+ iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr);
+ iter->len = vec->len;
+ return true;
+}
+
+static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
+ struct blk_dma_iter *iter, struct phys_vec *vec)
+{
+ iter->addr = dma_map_page(dma_dev, blk_phys_to_page(vec->paddr),
+ offset_in_page(vec->paddr), vec->len, rq_dma_dir(req));
+ if (dma_mapping_error(dma_dev, iter->addr)) {
+ iter->status = BLK_STS_RESOURCE;
+ return false;
+ }
+ iter->len = vec->len;
+ return true;
+}
+
+static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, struct blk_dma_iter *iter,
+ struct phys_vec *vec)
+{
+ enum dma_data_direction dir = rq_dma_dir(req);
+ unsigned int mapped = 0;
+ int error = 0;
+
+ iter->addr = state->addr;
+ iter->len = dma_iova_size(state);
+
+ do {
+ error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
+ vec->len, dir, 0);
+ if (error)
+ break;
+ mapped += vec->len;
+ } while (blk_map_iter_next(req, &iter->iter, vec));
+
+ error = dma_iova_sync(dma_dev, state, 0, mapped, error);
+ if (error) {
+ iter->status = errno_to_blk_status(error);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * blk_rq_dma_map_iter_start - map the first DMA segment for a request
+ * @req: request to map
+ * @dma_dev: device to map to
+ * @state: DMA IOVA state
+ * @iter: block layer DMA iterator
+ *
+ * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the
+ * caller and don't need to be initialized. @state needs to be stored for use
+ * at unmap time, @iter is only needed at map time.
+ *
+ * Returns %false if there is no segment to map, including due to an error, or
+ * %true it it did map a segment.
+ *
+ * If a segment was mapped, the DMA address for it is returned in @iter.addr and
+ * the length in @iter.len. If no segment was mapped the status code is
+ * returned in @iter.status.
+ *
+ * The caller can call blk_rq_dma_map_coalesce() to check if further segments
+ * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
+ * to try to map the following segments.
+ */
+bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, struct blk_dma_iter *iter)
+{
+ unsigned int total_len = blk_rq_payload_bytes(req);
+ struct phys_vec vec;
+
+ iter->iter.bio = req->bio;
+ iter->iter.iter = req->bio->bi_iter;
+ memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
+ iter->status = BLK_STS_OK;
+
+ /*
+ * Grab the first segment ASAP because we'll need it to check for P2P
+ * transfers.
+ */
+ if (!blk_map_iter_next(req, &iter->iter, &vec))
+ return false;
+
+ if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) {
+ switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
+ blk_phys_to_page(vec.paddr))) {
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ return blk_dma_map_bus(req, dma_dev, iter, &vec);
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * P2P transfers through the host bridge are treated the
+ * same as non-P2P transfers below and during unmap.
+ */
+ req->cmd_flags &= ~REQ_P2PDMA;
+ break;
+ default:
+ iter->status = BLK_STS_INVAL;
+ return false;
+ }
+ }
+
+ if (blk_can_dma_map_iova(req, dma_dev) &&
+ dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
+ return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
+ return blk_dma_map_direct(req, dma_dev, iter, &vec);
+}
+EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
+
+/**
+ * blk_rq_dma_map_iter_next - map the next DMA segment for a request
+ * @req: request to map
+ * @dma_dev: device to map to
+ * @state: DMA IOVA state
+ * @iter: block layer DMA iterator
+ *
+ * Iterate to the next mapping after a previous call to
+ * blk_rq_dma_map_iter_start(). See there for a detailed description of the
+ * arguments.
+ *
+ * Returns %false if there is no segment to map, including due to an error, or
+ * %true it it did map a segment.
+ *
+ * If a segment was mapped, the DMA address for it is returned in @iter.addr and
+ * the length in @iter.len. If no segment was mapped the status code is
+ * returned in @iter.status.
+ */
+bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, struct blk_dma_iter *iter)
+{
+ struct phys_vec vec;
+
+ if (!blk_map_iter_next(req, &iter->iter, &vec))
+ return false;
+
+ if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
+ return blk_dma_map_bus(req, dma_dev, iter, &vec);
+ return blk_dma_map_direct(req, dma_dev, iter, &vec);
+}
+EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);
+
static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
struct scatterlist *sglist)
{
new file mode 100644
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BLK_MQ_DMA_H
+#define BLK_MQ_DMA_H
+
+#include <linux/blk-mq.h>
+#include <linux/pci-p2pdma.h>
+
+struct blk_dma_iter {
+ /* Output address range for this iteration */
+ dma_addr_t addr;
+ u32 len;
+
+ /* Status code. Only valid when blk_rq_dma_map_iter_* returned false */
+ blk_status_t status;
+
+ /* Internal to blk_rq_dma_map_iter_* */
+ struct req_iterator iter;
+ struct pci_p2pdma_map_state p2pdma;
+};
+
+bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, struct blk_dma_iter *iter);
+bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, struct blk_dma_iter *iter);
+
+/**
+ * blk_rq_dma_map_coalesce - were all segments coalesced?
+ * @state: DMA state to check
+ *
+ * Returns true if blk_rq_dma_map_iter_start coalesced all segments into a
+ * single DMA range.
+ */
+static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
+{
+ return dma_use_iova(state);
+}
+
+/**
+ * blk_rq_dma_map_coalesce - try to DMA unmap a request
+ * @req: request to unmap
+ * @dma_dev: device to unmap from
+ * @state: DMA IOVA state
+ *
+ * Returns %false if the callers needs to manually unmap every DMA segment
+ * mapped using @iter or %true if no work is left to be done.
+ */
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state)
+{
+ if (req->cmd_flags & REQ_P2PDMA)
+ return true;
+
+ if (dma_use_iova(state)) {
+ dma_iova_destroy(dma_dev, state, rq_dma_dir(req), 0);
+ return true;
+ }
+
+ if (!dma_need_unmap(dma_dev))
+ return true;
+
+ return false;
+}
+
+#endif /* BLK_MQ_DMA_H */