Message ID | 20200707144540.21216-11-sjpark@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | DAMON: Support Physical Memory Address Space Monitoring | expand |
>-----Original Message----- >From: owner-linux-mm@kvack.org <owner-linux-mm@kvack.org> On Behalf >Of SeongJae Park >Sent: Tuesday, July 7, 2020 10:46 PM >To: akpm@linux-foundation.org >Cc: SeongJae Park <sjpark@amazon.de>; Jonathan.Cameron@Huawei.com; >aarcange@redhat.com; acme@kernel.org; alexander.shishkin@linux.intel.com; >amit@kernel.org; benh@kernel.crashing.org; brendan.d.gregg@gmail.com; >brendanhiggins@google.com; cai@lca.pw; colin.king@canonical.com; >corbet@lwn.net; david@redhat.com; dwmw@amazon.com; >foersleo@amazon.de; irogers@google.com; jolsa@redhat.com; >kirill@shutemov.name; mark.rutland@arm.com; mgorman@suse.de; >minchan@kernel.org; mingo@redhat.com; namhyung@kernel.org; >peterz@infradead.org; rdunlap@infradead.org; riel@surriel.com; >rientjes@google.com; rostedt@goodmis.org; rppt@kernel.org; >sblbir@amazon.com; shakeelb@google.com; shuah@kernel.org; >sj38.park@gmail.com; snu@amazon.de; vbabka@suse.cz; >vdavydov.dev@gmail.com; yang.shi@linux.alibaba.com; Huang, Ying ><ying.huang@intel.com>; linux-damon@amazon.com; linux-mm@kvack.org; >linux-doc@vger.kernel.org; linux-kernel@vger.kernel.org >Subject: [RFC v5 10/11] tools/damon/record: Support NUMA specific >recording > >From: SeongJae Park <sjpark@amazon.de> > >This commit updates the DAMON user space tool (damo-record) for NUMA >specific physical memory monitoring. With this change, users can >monitor accesses to physical memory of specific NUMA node. > >Signed-off-by: SeongJae Park <sjpark@amazon.de> >--- > tools/damon/_paddr_layout.py | 158 >+++++++++++++++++++++++++++++++++++ > tools/damon/record.py | 21 ++++- > 2 files changed, 178 insertions(+), 1 deletion(-) > create mode 100644 tools/damon/_paddr_layout.py > >diff --git a/tools/damon/_paddr_layout.py b/tools/damon/_paddr_layout.py >new file mode 100644 >index 000000000000..10056172db21 >--- /dev/null >+++ b/tools/damon/_paddr_layout.py >@@ -0,0 +1,158 @@ >+#!/usr/bin/env python3 >+# SPDX-License-Identifier: GPL-2.0 >+ >+import os >+ >+class PaddrRange: >+ start = None >+ end = None >+ nid = None >+ state = None >+ name = None >+ >+ def __init__(self, start, end, nid, state, name): >+ self.start = start >+ self.end = end >+ self.nid = nid >+ self.state = state >+ self.name = name >+ >+ def interleaved(self, prange): >+ if self.end <= prange.start: >+ return None >+ if prange.end <= self.start: >+ return None >+ return [max(self.start, prange.start), min(self.end, prange.end)] >+ >+ def __str__(self): >+ return '%x-%x, nid %s, state %s, name %s' % (self.start, self.end, >+ self.nid, self.state, self.name) >+ >+class MemBlock: >+ nid = None >+ index = None >+ state = None >+ >+ def __init__(self, nid, index, state): >+ self.nid = nid >+ self.index = index >+ self.state = state >+ >+ def __str__(self): >+ return '%d (%s)' % (self.index, self.state) >+ >+ def __repr__(self): >+ return self.__str__() >+ >+def readfile(file_path): >+ with open(file_path, 'r') as f: >+ return f.read() >+ >+def collapse_ranges(ranges): >+ ranges = sorted(ranges, key=lambda x: x.start) >+ merged = [] >+ for r in ranges: >+ if not merged: >+ merged.append(r) >+ continue >+ last = merged[-1] >+ if last.end != r.start or last.nid != r.nid or last.state != r.state: >+ merged.append(r) >+ else: >+ last.end = r.end >+ return merged >+ >+def memblocks_to_ranges(blocks, block_size): >+ ranges = [] >+ for b in blocks: >+ ranges.append(PaddrRange(b.index * block_size, >+ (b.index + 1) * block_size, b.nid, b.state, None)) >+ >+ return collapse_ranges(ranges) >+ >+def memblock_ranges(): >+ SYSFS='/sys/devices/system/node' >+ sz_block = int(readfile('/sys/devices/system/memory/block_size_bytes'), >16) >+ sys_nodes = [x for x in os.listdir(SYSFS) if x.startswith('node')] >+ >+ blocks = [] >+ for sys_node in sys_nodes: >+ nid = int(sys_node[4:]) >+ >+ sys_node_files = os.listdir(os.path.join(SYSFS, sys_node)) >+ for f in sys_node_files: >+ if not f.startswith('memory'): >+ continue >+ index = int(f[6:]) >+ sys_state = os.path.join(SYSFS, sys_node, f, 'state') >+ state = readfile(sys_state).strip() >+ >+ blocks.append(MemBlock(nid, index, state)) >+ >+ return memblocks_to_ranges(blocks, sz_block) >+ >+def iomem_ranges(): >+ ranges = [] >+ >+ with open('/proc/iomem', 'r') as f: >+ # example of the line: '100000000-42b201fff : System RAM' >+ for line in f: >+ fields = line.split(':') >+ if len(fields) < 2: >+ continue >+ name = ':'.join(fields[1:]).strip() >+ addrs = fields[0].split('-') >+ if len(addrs) != 2: >+ continue >+ start = int(addrs[0], 16) >+ end = int(addrs[1], 16) + 1 >+ ranges.append(PaddrRange(start, end, None, None, name)) >+ >+ return ranges Hi SeongJae Here on system with persistent memory, user can plug {all or portion of }persistent memory into buddy system with drivers from patchset[1] implemented. The persistent memory in this setup will be treated as normal system RAM, and have valid full-fledged page structure as well. For example, here is what /proc/iomem looks like in system with such configuration on my testbed. 1840000000-963fffffff : Persistent Memory 1840000000-18be1fffff : namespace0.0 18c0000000-37bfffffff : dax0.0 18c0000000-37bfffffff : System RAM <- first 128G of persistent memory corresponding to node 2 # numactl -H available: 3 nodes (0-2) node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 node 0 size: 95447 MB node 0 free: 92391 MB node 1 cpus: 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 node 1 size: 96733 MB node 1 free: 95670 MB node 2 cpus: node 2 size: 126976 MB node 2 free: 126935 MB node distances: node 0 1 2 0: 10 21 17 1: 21 10 28 2: 17 28 10 So here we have two ranges: 1840000000-963fffffff : Persistent Memory 18c0000000-37bfffffff : System RAM [1]: https://patchwork.kernel.org/cover/10829019/ >+def paddr_ranges(): >+ ranges1 = memblock_ranges() >+ ranges2 = iomem_ranges() >+ merged = [] >+ >+ for r in ranges1: >+ subsets = [] >+ for r2 in ranges2: >+ interleaved = r.interleaved(r2) >+ if interleaved == None: >+ continue >+ >+ start, end = interleaved >+ left = None >+ if start > r.start: >+ left = PaddrRange(r.start, start, r.nid, r.state, r.name) >+ subsets.append(left) >+ >+ middle = PaddrRange(start, end, r.nid, r.state, r.name) >+ if r2.nid: >+ middle.nid = r2.nid >+ if r2.state: >+ middle.state = r2.state >+ if r2.name: >+ middle.name = r2.name Memory block from numa node 2 will match with range "1840000000-963fffffff : Persistent Memory" But take the name "Persistent memory", expected "System RAM" here. >+ subsets.append(middle) >+ r.start = end >+ if r.start < r.end: >+ subsets = [r] >+ >+ merged += subsets >+ return merged >+ >+def pr_ranges(ranges): >+ print('#%12s %13s\tnode\tstate\tresource\tsize' % ('start', 'end')) >+ for r in ranges: >+ print('%13d %13d\t%s\t%s\t%s\t%d' % (r.start, r.end, r.nid, >+ r.state, r.name, r.end - r.start)) >+ >+def main(): >+ ranges = paddr_ranges() >+ >+ pr_ranges(ranges) >+ >+if __name__ == '__main__': >+ main() >diff --git a/tools/damon/record.py b/tools/damon/record.py >index 416dca940c1d..8440a9818810 100644 >--- a/tools/damon/record.py >+++ b/tools/damon/record.py >@@ -12,6 +12,7 @@ import subprocess > import time > > import _damon >+import _paddr_layout > > def do_record(target, is_target_cmd, init_regions, attrs, old_attrs): > if os.path.isfile(attrs.rfile_path): >@@ -70,6 +71,8 @@ def set_argparser(parser): > help='the target command or the pid to record') > parser.add_argument('-l', '--rbuf', metavar='<len>', type=int, > default=1024*1024, help='length of record result buffer') >+ parser.add_argument('--numa_node', metavar='<node id>', type=int, >+ help='if target is \'paddr\', limit it to the numa node') > parser.add_argument('-o', '--out', metavar='<file path>', type=str, > default='damon.data', help='output file path') > >@@ -96,6 +99,18 @@ def default_paddr_region(): > ret = [start, end] > return ret > >+def paddr_region_of(numa_node): >+ regions = [] >+ default_region = default_paddr_region() >+ paddr_ranges = _paddr_layout.paddr_ranges() >+ for r in paddr_ranges: >+ if r.end <= default_region[0] or default_region[1] <= r.start: >+ continue >+ if r.nid == numa_node and r.name == 'System RAM': >+ regions.append([r.start, r.end]) To profile physical address for numa node 2, above checking will not return valid node 2 range i.e., 18c0000000-37bfffffff : System RAM, but empty ones. I tweaked the checking to match "Persistent Memory" also, it looks physical address monitoring with numa node specified works from first glance. I'm willing to give it a try once you posted next updated version. >+ return regions >+ > def main(args=None): > global orig_attrs > if not args: >@@ -113,12 +128,16 @@ def main(args=None): > args.schemes = '' > new_attrs = _damon.cmd_args_to_attrs(args) > init_regions = _damon.cmd_args_to_init_regions(args) >+ numa_node = args.numa_node > target = args.target > > target_fields = target.split() > if target == 'paddr': # physical memory address space > if not init_regions: >- init_regions = [default_paddr_region()] >+ if numa_node: >+ init_regions = paddr_region_of(numa_node) >+ else: >+ init_regions = [default_paddr_region()] > do_record(target, False, init_regions, new_attrs, orig_attrs) > elif not subprocess.call('which %s > /dev/null' % target_fields[0], > shell=True, executable='/bin/bash'): >-- >2.17.1 >
diff --git a/tools/damon/_paddr_layout.py b/tools/damon/_paddr_layout.py new file mode 100644 index 000000000000..10056172db21 --- /dev/null +++ b/tools/damon/_paddr_layout.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import os + +class PaddrRange: + start = None + end = None + nid = None + state = None + name = None + + def __init__(self, start, end, nid, state, name): + self.start = start + self.end = end + self.nid = nid + self.state = state + self.name = name + + def interleaved(self, prange): + if self.end <= prange.start: + return None + if prange.end <= self.start: + return None + return [max(self.start, prange.start), min(self.end, prange.end)] + + def __str__(self): + return '%x-%x, nid %s, state %s, name %s' % (self.start, self.end, + self.nid, self.state, self.name) + +class MemBlock: + nid = None + index = None + state = None + + def __init__(self, nid, index, state): + self.nid = nid + self.index = index + self.state = state + + def __str__(self): + return '%d (%s)' % (self.index, self.state) + + def __repr__(self): + return self.__str__() + +def readfile(file_path): + with open(file_path, 'r') as f: + return f.read() + +def collapse_ranges(ranges): + ranges = sorted(ranges, key=lambda x: x.start) + merged = [] + for r in ranges: + if not merged: + merged.append(r) + continue + last = merged[-1] + if last.end != r.start or last.nid != r.nid or last.state != r.state: + merged.append(r) + else: + last.end = r.end + return merged + +def memblocks_to_ranges(blocks, block_size): + ranges = [] + for b in blocks: + ranges.append(PaddrRange(b.index * block_size, + (b.index + 1) * block_size, b.nid, b.state, None)) + + return collapse_ranges(ranges) + +def memblock_ranges(): + SYSFS='/sys/devices/system/node' + sz_block = int(readfile('/sys/devices/system/memory/block_size_bytes'), 16) + sys_nodes = [x for x in os.listdir(SYSFS) if x.startswith('node')] + + blocks = [] + for sys_node in sys_nodes: + nid = int(sys_node[4:]) + + sys_node_files = os.listdir(os.path.join(SYSFS, sys_node)) + for f in sys_node_files: + if not f.startswith('memory'): + continue + index = int(f[6:]) + sys_state = os.path.join(SYSFS, sys_node, f, 'state') + state = readfile(sys_state).strip() + + blocks.append(MemBlock(nid, index, state)) + + return memblocks_to_ranges(blocks, sz_block) + +def iomem_ranges(): + ranges = [] + + with open('/proc/iomem', 'r') as f: + # example of the line: '100000000-42b201fff : System RAM' + for line in f: + fields = line.split(':') + if len(fields) < 2: + continue + name = ':'.join(fields[1:]).strip() + addrs = fields[0].split('-') + if len(addrs) != 2: + continue + start = int(addrs[0], 16) + end = int(addrs[1], 16) + 1 + ranges.append(PaddrRange(start, end, None, None, name)) + + return ranges + +def paddr_ranges(): + ranges1 = memblock_ranges() + ranges2 = iomem_ranges() + merged = [] + + for r in ranges1: + subsets = [] + for r2 in ranges2: + interleaved = r.interleaved(r2) + if interleaved == None: + continue + + start, end = interleaved + left = None + if start > r.start: + left = PaddrRange(r.start, start, r.nid, r.state, r.name) + subsets.append(left) + + middle = PaddrRange(start, end, r.nid, r.state, r.name) + if r2.nid: + middle.nid = r2.nid + if r2.state: + middle.state = r2.state + if r2.name: + middle.name = r2.name + subsets.append(middle) + r.start = end + if r.start < r.end: + subsets = [r] + + merged += subsets + return merged + +def pr_ranges(ranges): + print('#%12s %13s\tnode\tstate\tresource\tsize' % ('start', 'end')) + for r in ranges: + print('%13d %13d\t%s\t%s\t%s\t%d' % (r.start, r.end, r.nid, + r.state, r.name, r.end - r.start)) + +def main(): + ranges = paddr_ranges() + + pr_ranges(ranges) + +if __name__ == '__main__': + main() diff --git a/tools/damon/record.py b/tools/damon/record.py index 416dca940c1d..8440a9818810 100644 --- a/tools/damon/record.py +++ b/tools/damon/record.py @@ -12,6 +12,7 @@ import subprocess import time import _damon +import _paddr_layout def do_record(target, is_target_cmd, init_regions, attrs, old_attrs): if os.path.isfile(attrs.rfile_path): @@ -70,6 +71,8 @@ def set_argparser(parser): help='the target command or the pid to record') parser.add_argument('-l', '--rbuf', metavar='<len>', type=int, default=1024*1024, help='length of record result buffer') + parser.add_argument('--numa_node', metavar='<node id>', type=int, + help='if target is \'paddr\', limit it to the numa node') parser.add_argument('-o', '--out', metavar='<file path>', type=str, default='damon.data', help='output file path') @@ -96,6 +99,18 @@ def default_paddr_region(): ret = [start, end] return ret +def paddr_region_of(numa_node): + regions = [] + default_region = default_paddr_region() + paddr_ranges = _paddr_layout.paddr_ranges() + for r in paddr_ranges: + if r.end <= default_region[0] or default_region[1] <= r.start: + continue + if r.nid == numa_node and r.name == 'System RAM': + regions.append([r.start, r.end]) + + return regions + def main(args=None): global orig_attrs if not args: @@ -113,12 +128,16 @@ def main(args=None): args.schemes = '' new_attrs = _damon.cmd_args_to_attrs(args) init_regions = _damon.cmd_args_to_init_regions(args) + numa_node = args.numa_node target = args.target target_fields = target.split() if target == 'paddr': # physical memory address space if not init_regions: - init_regions = [default_paddr_region()] + if numa_node: + init_regions = paddr_region_of(numa_node) + else: + init_regions = [default_paddr_region()] do_record(target, False, init_regions, new_attrs, orig_attrs) elif not subprocess.call('which %s > /dev/null' % target_fields[0], shell=True, executable='/bin/bash'):