@@ -558,6 +558,7 @@ struct trace_buffer {
struct ring_buffer_meta *meta;
unsigned long phys_start;
+ unsigned long virt_start;
unsigned int subbuf_size;
unsigned int subbuf_order;
@@ -2455,6 +2456,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
start = addr;
end = start + size;
+ buffer->virt_start = start;
/* scratch_size needs to be aligned too */
scratch_size = ALIGN(scratch_size, sizeof(long));
@@ -6058,6 +6060,80 @@ static void rb_clear_buffer_page(struct buffer_page *page)
page->read = 0;
}
+/*
+ * Get the struct page for the given buffer page.
+ *
+ * For normal ring buffer pages that are allocated via page_alloc()
+ * the struct page can simply be retrieved via virt_to_page().
+ *
+ * But if the buffer was created via a physical mapping and vmap()
+ * was used to get to the virtual addresses, use the stored virtual
+ * and physical of the start address to calculate the original
+ * physical address of the given page and use pfn_to_page() to return
+ * the struct page.
+ */
+static struct page *rb_struct_page(struct trace_buffer *buffer, void *vaddr)
+{
+ if (buffer->flags & RB_FL_PHYSICAL) {
+ unsigned long addr = (unsigned long)vaddr;
+
+ addr -= buffer->virt_start;
+ addr += buffer->phys_start;
+ return pfn_to_page(addr >> PAGE_SHIFT);
+ }
+ return virt_to_page(vaddr);
+}
+
+/* Some archs do not have data cache coherency between kernel and user-space */
+static void rb_flush_buffer_page(struct trace_buffer *buffer,
+ struct buffer_page *bpage)
+{
+ struct page *page = rb_struct_page(buffer, bpage->page);
+
+ flush_dcache_folio(page_folio(page));
+}
+
+/* The user mapped meta page is always allocated via page_alloc() */
+static void rb_flush_meta(void *meta)
+{
+ struct page *page = virt_to_page(meta);
+
+ flush_dcache_folio(page_folio(page));
+}
+
+/*
+ * When the buffer is memory mapped to user space, each sub buffer
+ * has a unique id that is used by the meta data to tell the user
+ * where the current reader page is.
+ *
+ * For a normal allocated ring buffer, the id is saved in the buffer page
+ * id field, and updated via this function.
+ *
+ * But for a physical memory mapped buffer, the id is already assigned for
+ * memory ording in the physical memory layout and can not be used. Instead
+ * the index of where the page lies in the memory layout is used.
+ *
+ * For the normal pages, set the buffer page id with the passed in @id
+ * value and return that.
+ *
+ * For memory mapped pages, get the page index in the physical memory layout
+ * and return that as the id.
+ */
+static int rb_page_id(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *bpage, int id)
+{
+ /*
+ * For boot buffers, the id is the index,
+ * otherwise, set the buffer page with this id
+ */
+ if (cpu_buffer->ring_meta)
+ id = rb_meta_subbuf_idx(cpu_buffer->ring_meta, bpage->page);
+ else
+ bpage->id = id;
+
+ return id;
+}
+
static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{
struct trace_buffer_meta *meta = cpu_buffer->meta_page;
@@ -6066,15 +6142,16 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
return;
meta->reader.read = cpu_buffer->reader_page->read;
- meta->reader.id = cpu_buffer->reader_page->id;
+ meta->reader.id = rb_page_id(cpu_buffer, cpu_buffer->reader_page,
+ cpu_buffer->reader_page->id);
+
meta->reader.lost_events = cpu_buffer->lost_events;
meta->entries = local_read(&cpu_buffer->entries);
meta->overrun = local_read(&cpu_buffer->overrun);
meta->read = cpu_buffer->read;
- /* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->meta_page));
+ rb_flush_meta(meta);
}
static void
@@ -6982,23 +7059,29 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
struct trace_buffer_meta *meta = cpu_buffer->meta_page;
unsigned int nr_subbufs = cpu_buffer->nr_pages + 1;
struct buffer_page *first_subbuf, *subbuf;
+ int cnt = 0;
int id = 0;
- subbuf_ids[id] = (unsigned long)cpu_buffer->reader_page->page;
- cpu_buffer->reader_page->id = id++;
+ id = rb_page_id(cpu_buffer, cpu_buffer->reader_page, id);
+ subbuf_ids[id++] = (unsigned long)cpu_buffer->reader_page->page;
+ cnt++;
first_subbuf = subbuf = rb_set_head_page(cpu_buffer);
do {
+ id = rb_page_id(cpu_buffer, subbuf, id);
+
if (WARN_ON(id >= nr_subbufs))
break;
subbuf_ids[id] = (unsigned long)subbuf->page;
- subbuf->id = id;
rb_inc_page(&subbuf);
id++;
+ cnt++;
} while (subbuf != first_subbuf);
+ WARN_ON(cnt != nr_subbufs);
+
/* install subbuf ID to kern VA translation */
cpu_buffer->subbuf_ids = subbuf_ids;
@@ -7134,6 +7217,7 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
if (!pgoff) {
unsigned long meta_page_padding;
+ /* The meta page is always allocated via alloc_page() */
pages[p++] = virt_to_page(cpu_buffer->meta_page);
/*
@@ -7163,7 +7247,8 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
goto out;
}
- page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
+ page = rb_struct_page(cpu_buffer->buffer,
+ (void *)cpu_buffer->subbuf_ids[s]);
for (; off < (1 << (subbuf_order)); off++, page++) {
if (p >= nr_pages)
@@ -7196,14 +7281,6 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
unsigned long flags, *subbuf_ids;
int err = 0;
- /*
- * Currently, this does not support vmap()'d buffers.
- * Return -ENODEV as that is what is returned when a file
- * does not support memory mapping.
- */
- if (buffer->flags & RB_FL_PHYSICAL)
- return -ENODEV;
-
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
@@ -7384,8 +7461,7 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu)
goto consume;
out:
- /* Some archs do not have data cache coherency between kernel and user-space */
- flush_dcache_folio(virt_to_folio(cpu_buffer->reader_page->page));
+ rb_flush_buffer_page(buffer, cpu_buffer->reader_page);
rb_update_meta_page(cpu_buffer);