@@ -181,10 +181,31 @@ struct hmm_range {
const uint64_t *values;
uint64_t default_flags;
uint64_t pfn_flags_mask;
+ uint8_t page_shift;
uint8_t pfn_shift;
bool valid;
};
+/*
+ * hmm_range_page_shift() - return the page shift for the range
+ * @range: range being queried
+ * Returns: page shift (page size = 1 << page shift) for the range
+ */
+static inline unsigned hmm_range_page_shift(const struct hmm_range *range)
+{
+ return range->page_shift;
+}
+
+/*
+ * hmm_range_page_size() - return the page size for the range
+ * @range: range being queried
+ * Returns: page size for the range in bytes
+ */
+static inline unsigned long hmm_range_page_size(const struct hmm_range *range)
+{
+ return 1UL << hmm_range_page_shift(range);
+}
+
/*
* hmm_range_wait_until_valid() - wait for range to be valid
* @range: range affected by invalidation to wait on
@@ -438,7 +459,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
* struct hmm_range range;
* ...
*
- * ret = hmm_range_register(&range, mm, start, end);
+ * ret = hmm_range_register(&range, mm, start, end, page_shift);
* if (ret)
* return ret;
*
@@ -498,7 +519,8 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
int hmm_range_register(struct hmm_range *range,
struct mm_struct *mm,
unsigned long start,
- unsigned long end);
+ unsigned long end,
+ unsigned page_shift);
void hmm_range_unregister(struct hmm_range *range);
long hmm_range_snapshot(struct hmm_range *range);
long hmm_range_fault(struct hmm_range *range, bool block);
@@ -538,7 +560,8 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block)
range->pfn_flags_mask = -1UL;
ret = hmm_range_register(range, range->vma->vm_mm,
- range->start, range->end);
+ range->start, range->end,
+ PAGE_SHIFT);
if (ret)
return (int)ret;
@@ -396,11 +396,13 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
uint64_t *pfns = range->pfns;
- unsigned long i;
+ unsigned long i, page_size;
hmm_vma_walk->last = addr;
- i = (addr - range->start) >> PAGE_SHIFT;
- for (; addr < end; addr += PAGE_SIZE, i++) {
+ page_size = 1UL << range->page_shift;
+ i = (addr - range->start) >> range->page_shift;
+
+ for (; addr < end; addr += page_size, i++) {
pfns[i] = range->values[HMM_PFN_NONE];
if (fault || write_fault) {
int ret;
@@ -712,6 +714,69 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return 0;
}
+static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ unsigned long addr = start, i, pfn, mask, size, pfn_inc;
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct hmm_range *range = hmm_vma_walk->range;
+ struct vm_area_struct *vma = walk->vma;
+ struct hstate *h = hstate_vma(vma);
+ uint64_t orig_pfn, cpu_flags;
+ bool fault, write_fault;
+ spinlock_t *ptl;
+ pte_t entry;
+ int ret = 0;
+
+ size = 1UL << huge_page_shift(h);
+ mask = size - 1;
+ if (range->page_shift != PAGE_SHIFT) {
+ /* Make sure we are looking at full page. */
+ if (start & mask)
+ return -EINVAL;
+ if (end < (start + size))
+ return -EINVAL;
+ pfn_inc = size >> PAGE_SHIFT;
+ } else {
+ pfn_inc = 1;
+ size = PAGE_SIZE;
+ }
+
+
+ ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+ entry = huge_ptep_get(pte);
+
+ i = (start - range->start) >> range->page_shift;
+ orig_pfn = range->pfns[i];
+ range->pfns[i] = range->values[HMM_PFN_NONE];
+ cpu_flags = pte_to_hmm_pfn_flags(range, entry);
+ fault = write_fault = false;
+ hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+ &fault, &write_fault);
+ if (fault || write_fault) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ pfn = pte_pfn(entry) + (start & mask);
+ for (; addr < end; addr += size, i++, pfn += pfn_inc)
+ range->pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+ hmm_vma_walk->last = end;
+
+unlock:
+ spin_unlock(ptl);
+
+ if (ret == -ENOENT)
+ return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+
+ return ret;
+#else /* CONFIG_HUGETLB_PAGE */
+ return -EINVAL;
+#endif
+}
+
static void hmm_pfns_clear(struct hmm_range *range,
uint64_t *pfns,
unsigned long addr,
@@ -735,6 +800,7 @@ static void hmm_pfns_special(struct hmm_range *range)
* @mm: the mm struct for the range of virtual address
* @start: start virtual address (inclusive)
* @end: end virtual address (exclusive)
+ * @page_shift: expect page shift for the range
* Returns 0 on success, -EFAULT if the address space is no longer valid
*
* Track updates to the CPU page table see include/linux/hmm.h
@@ -742,15 +808,22 @@ static void hmm_pfns_special(struct hmm_range *range)
int hmm_range_register(struct hmm_range *range,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ unsigned page_shift)
{
- range->start = start & PAGE_MASK;
- range->end = end & PAGE_MASK;
+ unsigned long mask = ((1UL << page_shift) - 1UL);
+
range->valid = false;
range->hmm = NULL;
- if (range->start >= range->end)
+ if ((start & mask) || (end & mask))
return -EINVAL;
+ if (start >= end)
+ return -EINVAL;
+
+ range->page_shift = page_shift;
+ range->start = start;
+ range->end = end;
range->hmm = hmm_register(mm);
if (!range->hmm)
@@ -818,6 +891,7 @@ EXPORT_SYMBOL(hmm_range_unregister);
*/
long hmm_range_snapshot(struct hmm_range *range)
{
+ const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
unsigned long start = range->start, end;
struct hmm_vma_walk hmm_vma_walk;
struct hmm *hmm = range->hmm;
@@ -834,15 +908,26 @@ long hmm_range_snapshot(struct hmm_range *range)
return -EAGAIN;
vma = find_vma(hmm->mm, start);
- if (vma == NULL || (vma->vm_flags & VM_SPECIAL))
+ if (vma == NULL || (vma->vm_flags & device_vma))
return -EFAULT;
- /* FIXME support hugetlb fs/dax */
- if (is_vm_hugetlb_page(vma) || vma_is_dax(vma)) {
+ /* FIXME support dax */
+ if (vma_is_dax(vma)) {
hmm_pfns_special(range);
return -EINVAL;
}
+ if (is_vm_hugetlb_page(vma)) {
+ struct hstate *h = hstate_vma(vma);
+
+ if (huge_page_shift(h) != range->page_shift &&
+ range->page_shift != PAGE_SHIFT)
+ return -EINVAL;
+ } else {
+ if (range->page_shift != PAGE_SHIFT)
+ return -EINVAL;
+ }
+
if (!(vma->vm_flags & VM_READ)) {
/*
* If vma do not allow read access, then assume that it
@@ -868,6 +953,7 @@ long hmm_range_snapshot(struct hmm_range *range)
mm_walk.hugetlb_entry = NULL;
mm_walk.pmd_entry = hmm_vma_walk_pmd;
mm_walk.pte_hole = hmm_vma_walk_hole;
+ mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
walk_page_range(start, end, &mm_walk);
start = end;
@@ -909,6 +995,7 @@ EXPORT_SYMBOL(hmm_range_snapshot);
*/
long hmm_range_fault(struct hmm_range *range, bool block)
{
+ const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
unsigned long start = range->start, end;
struct hmm_vma_walk hmm_vma_walk;
struct hmm *hmm = range->hmm;
@@ -928,15 +1015,26 @@ long hmm_range_fault(struct hmm_range *range, bool block)
}
vma = find_vma(hmm->mm, start);
- if (vma == NULL || (vma->vm_flags & VM_SPECIAL))
+ if (vma == NULL || (vma->vm_flags & device_vma))
return -EFAULT;
- /* FIXME support hugetlb fs/dax */
- if (is_vm_hugetlb_page(vma) || vma_is_dax(vma)) {
+ /* FIXME support dax */
+ if (vma_is_dax(vma)) {
hmm_pfns_special(range);
return -EINVAL;
}
+ if (is_vm_hugetlb_page(vma)) {
+ struct hstate *h = hstate_vma(vma);
+
+ if (huge_page_shift(h) != range->page_shift &&
+ range->page_shift != PAGE_SHIFT)
+ return -EINVAL;
+ } else {
+ if (range->page_shift != PAGE_SHIFT)
+ return -EINVAL;
+ }
+
if (!(vma->vm_flags & VM_READ)) {
/*
* If vma do not allow read access, then assume that it
@@ -963,6 +1061,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
mm_walk.hugetlb_entry = NULL;
mm_walk.pmd_entry = hmm_vma_walk_pmd;
mm_walk.pte_hole = hmm_vma_walk_hole;
+ mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
do {
ret = walk_page_range(start, end, &mm_walk);
@@ -1003,14 +1102,15 @@ long hmm_range_dma_map(struct hmm_range *range,
dma_addr_t *daddrs,
bool block)
{
- unsigned long i, npages, mapped;
+ unsigned long i, npages, mapped, page_size;
long ret;
ret = hmm_range_fault(range, block);
if (ret <= 0)
return ret ? ret : -EBUSY;
- npages = (range->end - range->start) >> PAGE_SHIFT;
+ page_size = hmm_range_page_size(range);
+ npages = (range->end - range->start) >> range->page_shift;
for (i = 0, mapped = 0; i < npages; ++i) {
enum dma_data_direction dir = DMA_FROM_DEVICE;
struct page *page;
@@ -1039,7 +1139,7 @@ long hmm_range_dma_map(struct hmm_range *range,
if (range->pfns[i] & range->values[HMM_PFN_WRITE])
dir = DMA_BIDIRECTIONAL;
- daddrs[i] = dma_map_page(device, page, 0, PAGE_SIZE, dir);
+ daddrs[i] = dma_map_page(device, page, 0, page_size, dir);
if (dma_mapping_error(device, daddrs[i])) {
ret = -EFAULT;
goto unmap;
@@ -1066,7 +1166,7 @@ long hmm_range_dma_map(struct hmm_range *range,
if (range->pfns[i] & range->values[HMM_PFN_WRITE])
dir = DMA_BIDIRECTIONAL;
- dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+ dma_unmap_page(device, daddrs[i], page_size, dir);
mapped--;
}
@@ -1094,7 +1194,7 @@ long hmm_range_dma_unmap(struct hmm_range *range,
dma_addr_t *daddrs,
bool dirty)
{
- unsigned long i, npages;
+ unsigned long i, npages, page_size;
long cpages = 0;
/* Sanity check. */
@@ -1105,7 +1205,8 @@ long hmm_range_dma_unmap(struct hmm_range *range,
if (!range->pfns)
return -EINVAL;
- npages = (range->end - range->start) >> PAGE_SHIFT;
+ page_size = hmm_range_page_size(range);
+ npages = (range->end - range->start) >> range->page_shift;
for (i = 0; i < npages; ++i) {
enum dma_data_direction dir = DMA_FROM_DEVICE;
struct page *page;
@@ -1127,7 +1228,7 @@ long hmm_range_dma_unmap(struct hmm_range *range,
}
/* Unmap and clear pfns/dma address */
- dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+ dma_unmap_page(device, daddrs[i], page_size, dir);
range->pfns[i] = range->values[HMM_PFN_NONE];
/* FIXME see comments in hmm_vma_dma_map() */
daddrs[i] = 0;