@@ -0,0 +1,164 @@
+#if !defined(_TRACE_MM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MM_H
+
+#include <linux/tracepoint.h>
+#include <linux/page-flags.h>
+#include <linux/memcontrol.h>
+#include <linux/pagemap.h>
+#include <linux/mm.h>
+#include <linux/kernel-page-flags.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mm
+
+extern struct trace_print_flags pageflag_names[];
+
+/**
+ * dump_page_frame - called by the trace page dump trigger
+ * @pfn: page frame number
+ * @page: pointer to the page frame
+ *
+ * This is a helper trace point into the dumping of the page frames.
+ * It will record various infromation about a page frame.
+ */
+TRACE_EVENT(dump_page_frame,
+
+ TP_PROTO(unsigned long pfn, struct page *page),
+
+ TP_ARGS(pfn, page),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, pfn )
+ __field( struct page *, page )
+ __field( u64, stable_flags )
+ __field( unsigned long, flags )
+ __field( unsigned int, count )
+ __field( unsigned int, mapcount )
+ __field( unsigned long, private )
+ __field( unsigned long, mapping )
+ __field( unsigned long, index )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->page = page;
+ __entry->stable_flags = stable_page_flags(page);
+ __entry->flags = page->flags;
+ __entry->count = atomic_read(&page->_count);
+ __entry->mapcount = page_mapcount(page);
+ __entry->private = page->private;
+ __entry->mapping = (unsigned long)page->mapping;
+ __entry->index = page->index;
+ ),
+
+ TP_printk("%12lx %16p %8x %8x %16lx %16lx %16lx %s",
+ __entry->pfn,
+ __entry->page,
+ __entry->count,
+ __entry->mapcount,
+ __entry->private,
+ __entry->mapping,
+ __entry->index,
+ ftrace_print_flags_seq(p, "|",
+ __entry->flags & PAGE_FLAGS_MASK,
+ pageflag_names)
+ )
+);
+
+TRACE_EVENT(dump_page_cache,
+
+ TP_PROTO(struct page *page, unsigned long len),
+
+ TP_ARGS(page, len),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, index )
+ __field( unsigned long, len )
+ __field( u64, flags )
+ __field( unsigned int, count )
+ __field( unsigned int, mapcount )
+ ),
+
+ TP_fast_assign(
+ __entry->index = page->index;
+ __entry->len = len;
+ __entry->flags = stable_page_flags(page);
+ __entry->count = atomic_read(&page->_count);
+ __entry->mapcount = page_mapcount(page);
+ ),
+
+ TP_printk("%12lu %6lu %c%c%c%c%c%c%c%c%c%c%c %4u %4u",
+ __entry->index,
+ __entry->len,
+ __entry->flags & (1ULL << KPF_MMAP) ? 'M' : '_',
+ __entry->flags & (1ULL << KPF_MLOCKED) ? 'm' : '_',
+ __entry->flags & (1ULL << KPF_UNEVICTABLE) ? 'u' : '_',
+ __entry->flags & (1ULL << KPF_ACTIVE) ? 'A' : '_',
+ __entry->flags & (1ULL << KPF_REFERENCED) ? 'R' : '_',
+ __entry->flags & (1ULL << KPF_UPTODATE) ? 'U' : '_',
+ __entry->flags & (1ULL << KPF_DIRTY) ? 'D' : '_',
+ __entry->flags & (1ULL << KPF_WRITEBACK) ? 'W' : '_',
+ __entry->flags & (1ULL << KPF_RECLAIM) ? 'I' : '_',
+ __entry->flags & (1ULL << KPF_MAPPEDTODISK) ? 'd' : '_',
+ __entry->flags & (1ULL << KPF_PRIVATE) ? 'P' : '_',
+ __entry->count,
+ __entry->mapcount)
+);
+
+
+#define show_inode_type(val) __print_symbolic(val, \
+ { S_IFREG, "REG" }, \
+ { S_IFDIR, "DIR" }, \
+ { S_IFLNK, "LNK" }, \
+ { S_IFBLK, "BLK" }, \
+ { S_IFCHR, "CHR" }, \
+ { S_IFIFO, "FIFO" }, \
+ { S_IFSOCK, "SOCK" })
+
+TRACE_EVENT(dump_inode_cache,
+
+ TP_PROTO(struct inode *inode, char *name, int len),
+
+ TP_ARGS(inode, name, len),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, ino )
+ __field( loff_t, size ) /* bytes */
+ __field( loff_t, cached ) /* bytes */
+ __field( unsigned long, age ) /* ms */
+ __field( unsigned long, state )
+ __field( umode_t, mode )
+ __array( char, comm, TASK_COMM_LEN)
+ __dynamic_array(char, file, len )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->size = i_size_read(inode);
+ __entry->cached = inode->i_mapping->nrpages;
+ __entry->cached <<= PAGE_CACHE_SHIFT;
+ __entry->age = (jiffies - inode->dirtied_when) * 1000 / HZ;
+ __entry->state = inode->i_state;
+ __entry->mode = inode->i_mode;
+ memcpy(__entry->comm, inode->i_comm, TASK_COMM_LEN);
+ memcpy(__get_str(file), name, len);
+ ),
+
+ TP_printk("%12lu %12llu %12llu %12lu %c%c%c%c %4s %16s %s",
+ __entry->ino,
+ __entry->size,
+ __entry->cached,
+ __entry->age,
+ __entry->state & I_DIRTY_PAGES ? 'D' : '_',
+ __entry->state & I_DIRTY_DATASYNC ? 'd' : '_',
+ __entry->state & I_DIRTY_SYNC ? 'm' : '_',
+ __entry->state & I_SYNC ? 'S' : '_',
+ show_inode_type(__entry->mode & S_IFMT),
+ __entry->comm,
+ __get_str(file))
+);
+
+#endif /* _TRACE_MM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
@@ -26,6 +26,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_objects.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
@@ -53,6 +54,7 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
+obj-$(CONFIG_EVENT_TRACING) += trace_mm.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
@@ -0,0 +1,367 @@
+/*
+ * Trace mm pages
+ *
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * Code based on Matt Mackall's /proc/[kpagecount|kpageflags] code.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ctype.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+#include "trace_output.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/mm.h>
+
+void trace_mm_page_frames(unsigned long start, unsigned long end,
+ void (*trace)(unsigned long pfn, struct page *page))
+{
+ unsigned long pfn = start;
+ struct page *page;
+
+ if (start > max_pfn - 1)
+ return;
+
+ if (end > max_pfn)
+ end = max_pfn;
+
+ while (pfn < end) {
+ page = NULL;
+ if (pfn_valid(pfn))
+ page = pfn_to_page(pfn);
+ pfn++;
+ if (page)
+ trace(pfn, page);
+ }
+}
+
+static void trace_mm_page_frame(unsigned long pfn, struct page *page)
+{
+ trace_dump_page_frame(pfn, page);
+}
+
+static ssize_t
+trace_mm_pfn_range_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ return simple_read_from_buffer(ubuf, cnt, ppos, "0\n", 2);
+}
+
+
+/*
+ * recognized formats:
+ * "M N" start=M, end=N
+ * "M" start=M, end=M+1
+ * "M +N" start=M, end=M+N-1
+ */
+static ssize_t
+trace_mm_pfn_range_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ unsigned long start;
+ unsigned long end = 0;
+ char buf[64];
+ char *ptr;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ if (tracing_update_buffers() < 0)
+ return -ENOMEM;
+
+ if (trace_set_clr_event("mm", "dump_page_frame", 1))
+ return -EINVAL;
+
+ buf[cnt] = 0;
+
+ start = simple_strtoul(buf, &ptr, 0);
+
+ for (; *ptr; ptr++) {
+ if (isdigit(*ptr)) {
+ if (*(ptr - 1) == '+')
+ end = start;
+ end += simple_strtoul(ptr, NULL, 0);
+ break;
+ }
+ }
+ if (!*ptr)
+ end = start + 1;
+
+ trace_mm_page_frames(start, end, trace_mm_page_frame);
+
+ return cnt;
+}
+
+static const struct file_operations trace_mm_fops = {
+ .open = tracing_open_generic,
+ .read = trace_mm_pfn_range_read,
+ .write = trace_mm_pfn_range_write,
+};
+
+static struct dentry *trace_objects_mm_dir(void)
+{
+ static struct dentry *d_mm;
+ struct dentry *d_objects;
+
+ if (d_mm)
+ return d_mm;
+
+ d_objects = trace_objects_dir();
+ if (!d_objects)
+ return NULL;
+
+ d_mm = debugfs_create_dir("mm", d_objects);
+ if (!d_mm)
+ pr_warning("Could not create 'objects/mm' directory\n");
+
+ return d_mm;
+}
+
+static unsigned long page_flags(struct page *page)
+{
+ return page->flags & ((1 << NR_PAGEFLAGS) - 1);
+}
+
+static int pages_similar(struct page *page0, struct page *page)
+{
+ if (page_flags(page0) != page_flags(page))
+ return 0;
+
+ if (page_count(page0) != page_count(page))
+ return 0;
+
+ if (page_mapcount(page0) != page_mapcount(page))
+ return 0;
+
+ return 1;
+}
+
+static void dump_pagecache(struct address_space *mapping)
+{
+ unsigned long nr_pages;
+ struct page *pages[PAGEVEC_SIZE];
+ struct page *uninitialized_var(page0);
+ struct page *page;
+ unsigned long start = 0;
+ unsigned long len = 0;
+ int i;
+
+ for (;;) {
+ rcu_read_lock();
+ nr_pages = radix_tree_gang_lookup(&mapping->page_tree,
+ (void **)pages, start + len, PAGEVEC_SIZE);
+ rcu_read_unlock();
+
+ if (nr_pages == 0) {
+ if (len)
+ trace_dump_page_cache(page0, len);
+ return;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ page = pages[i];
+
+ if (len &&
+ page->index == start + len &&
+ pages_similar(page0, page))
+ len++;
+ else {
+ if (len)
+ trace_dump_page_cache(page0, len);
+ page0 = page;
+ start = page->index;
+ len = 1;
+ }
+ }
+ cond_resched();
+ }
+}
+
+static void dump_inode_cache(struct inode *inode,
+ char *name_buf,
+ struct vfsmount *mnt)
+{
+ struct path path = {
+ .mnt = mnt,
+ .dentry = d_find_alias(inode)
+ };
+ char *name;
+ int len;
+
+ if (!mnt) {
+ trace_dump_inode_cache(inode, name_buf, strlen(name_buf));
+ return;
+ }
+
+ if (!path.dentry) {
+ trace_dump_inode_cache(inode, "", 1);
+ return;
+ }
+
+ name = d_path(&path, name_buf, PAGE_SIZE);
+ if (IS_ERR(name)) {
+ name = "";
+ len = 1;
+ } else
+ len = PAGE_SIZE + name_buf - name;
+
+ trace_dump_inode_cache(inode, name, len);
+
+ if (path.dentry)
+ dput(path.dentry);
+}
+
+static void dump_fs_pagecache(struct super_block *sb, struct vfsmount *mnt)
+{
+ struct inode *inode;
+ struct inode *prev_inode = NULL;
+ char *name_buf;
+
+ name_buf = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!name_buf)
+ return;
+
+ down_read(&sb->s_umount);
+ if (!sb->s_root)
+ goto out;
+
+ spin_lock(&inode_lock);
+ list_for_each_entry_reverse(inode, &sb->s_inodes, i_sb_list) {
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ dump_inode_cache(inode, name_buf, mnt);
+ if (inode->i_mapping->nrpages)
+ dump_pagecache(inode->i_mapping);
+ iput(prev_inode);
+ prev_inode = inode;
+ cond_resched();
+ spin_lock(&inode_lock);
+ }
+ spin_unlock(&inode_lock);
+ iput(prev_inode);
+out:
+ up_read(&sb->s_umount);
+ free_page((unsigned long)name_buf);
+}
+
+static ssize_t
+trace_pagecache_write(struct file *filp, const char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ struct file *file = NULL;
+ char *name;
+ int err = 0;
+
+ if (count <= 1)
+ return -EINVAL;
+ if (count >= PAGE_SIZE)
+ return -ENAMETOOLONG;
+
+ name = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ if (copy_from_user(name, ubuf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* strip the newline added by `echo` */
+ if (name[count-1] == '\n')
+ name[count-1] = '\0';
+ else
+ name[count] = '\0';
+
+ file = filp_open(name, O_RDONLY|O_LARGEFILE, 0);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ file = NULL;
+ goto out;
+ }
+
+ if (tracing_update_buffers() < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ if (trace_set_clr_event("mm", "dump_page_cache", 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+ if (trace_set_clr_event("mm", "dump_inode_cache", 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (filp->f_path.dentry->d_inode->i_private) {
+ dump_fs_pagecache(file->f_path.dentry->d_sb, file->f_path.mnt);
+ } else {
+ dump_inode_cache(file->f_mapping->host, name, NULL);
+ dump_pagecache(file->f_mapping);
+ }
+
+out:
+ if (file)
+ fput(file);
+ kfree(name);
+
+ return err ? err : count;
+}
+
+static const struct file_operations trace_pagecache_fops = {
+ .open = tracing_open_generic,
+ .read = trace_mm_pfn_range_read,
+ .write = trace_pagecache_write,
+};
+
+static struct dentry *trace_objects_mm_pages_dir(void)
+{
+ static struct dentry *d_pages;
+ struct dentry *d_mm;
+
+ if (d_pages)
+ return d_pages;
+
+ d_mm = trace_objects_mm_dir();
+ if (!d_mm)
+ return NULL;
+
+ d_pages = debugfs_create_dir("pages", d_mm);
+ if (!d_pages)
+ pr_warning("Could not create debugfs "
+ "'objects/mm/pages' directory\n");
+
+ return d_pages;
+}
+
+static __init int trace_objects_mm_init(void)
+{
+ struct dentry *d_pages;
+
+ d_pages = trace_objects_mm_pages_dir();
+ if (!d_pages)
+ return 0;
+
+ trace_create_file("dump-pfn", 0600, d_pages, NULL,
+ &trace_mm_fops);
+
+ trace_create_file("dump-file", 0600, d_pages, NULL,
+ &trace_pagecache_fops);
+
+ trace_create_file("dump-fs", 0600, d_pages, (void *)1,
+ &trace_pagecache_fops);
+
+ return 0;
+}
+fs_initcall(trace_objects_mm_init);
@@ -295,6 +295,7 @@ struct dentry *trace_create_file(const c
const struct file_operations *fops);
struct dentry *tracing_init_dentry(void);
+struct dentry *trace_objects_dir(void);
struct ring_buffer_event;
@@ -0,0 +1,26 @@
+#include <linux/debugfs.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+struct dentry *trace_objects_dir(void)
+{
+ static struct dentry *d_objects;
+ struct dentry *d_tracer;
+
+ if (d_objects)
+ return d_objects;
+
+ d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return NULL;
+
+ d_objects = debugfs_create_dir("objects", d_tracer);
+ if (!d_objects)
+ pr_warning("Could not create debugfs "
+ "'objects' directory\n");
+
+ return d_objects;
+}
+
+
@@ -5493,7 +5493,7 @@ bool is_free_buddy_page(struct page *pag
}
#endif
-static struct trace_print_flags pageflag_names[] = {
+struct trace_print_flags pageflag_names[] = {
{1UL << PG_locked, "locked" },
{1UL << PG_error, "error" },
{1UL << PG_referenced, "referenced" },
@@ -5541,7 +5541,7 @@ static void dump_page_flags(unsigned lon
printk(KERN_ALERT "page flags: %#lx(", flags);
/* remove zone id */
- flags &= (1UL << NR_PAGEFLAGS) - 1;
+ flags &= PAGE_FLAGS_MASK;
for (i = 0; pageflag_names[i].name && flags; i++) {
@@ -414,6 +414,7 @@ static inline void __ClearPageTail(struc
* there has been a kernel bug or struct page corruption.
*/
#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
+#define PAGE_FLAGS_MASK ((1 << NR_PAGEFLAGS) - 1)
#define PAGE_FLAGS_PRIVATE \
(1 << PG_private | 1 << PG_private_2)
@@ -182,7 +182,13 @@ int inode_init_always(struct super_block
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_rdev = 0;
- inode->dirtied_when = 0;
+
+ /*
+ * This records inode load time. It will be invalidated once inode is
+ * dirtied, or jiffies wraps around. Despite the pitfalls it still
+ * provides useful information for some use cases like fastboot.
+ */
+ inode->dirtied_when = jiffies;
if (security_inode_alloc(inode))
goto out;
@@ -226,6 +232,9 @@ int inode_init_always(struct super_block
percpu_counter_inc(&nr_inodes);
+ BUILD_BUG_ON(sizeof(inode->i_comm) != TASK_COMM_LEN);
+ memcpy(inode->i_comm, current->comm, TASK_COMM_LEN);
+
return 0;
out:
return -ENOMEM;
@@ -800,6 +800,8 @@ struct inode {
struct posix_acl *i_default_acl;
#endif
void *i_private; /* fs or device private pointer */
+
+ char i_comm[16]; /* first opened by */
};
static inline int inode_unhashed(struct inode *inode)