Message ID | 20230607014353.3172466-1-irogers@google.com (mailing list archive) |
---|---|
Headers | show |
Series | Reference count checking for thread | expand |
On Tue, Jun 6, 2023 at 6:44 PM Ian Rogers <irogers@google.com> wrote: > > Add reference count checking to thread after first refactoring bits of > the code, such as making the thread red-black tree non-invasive (so > the thread it references is easier to reference count, rather than > having 3 potential references). Part of this refactoring also removes > the dead thread list because if we held a reference here the threads > would never die and anything else has questionable > correctness. addr_location is made into its own C/header file to > capture the init, exit and copy code. > > Fix additional outstanding memory leak and reference count issues to > the point that "perf test" compiled with address sanitizer but without > libtraceevent passes all but one test - libtraceevent reports leaks > within its own code, most likely as it isn't compiled with > sanitizers. The remaining failing test is "68: Test dwarf unwind" and > that has address sanitizer issues as it uses memcpy to access the > stack within the process - we likely want to skip parts of the test > with sanitizers enabled. So I tried a bit harder to break things and ran into crashes/leaks around callchains fixed by the patch below - I used 'perf top -g' and I'll merge the patch into a v2 for the series. The largest remaining leak is caused by map/maps retaining dsos retaining symbols. The map and maps are being retained by the TLS call chain cursors [1] allocated in callchain_cursor_append. For perf report there are similar retention issues via "struct hists", but as this maybe merged with evsels it is messy to cleanup. Thanks, Ian ``` diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 437325d19ad3..909f62b3b266 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -590,6 +590,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms = cursor_node->ms; call->ms.map = map__get(call->ms.map); + call->ms.maps = maps__get(call->ms.maps); call->srcline = cursor_node->srcline; if (cursor_node->branch) { @@ -649,6 +650,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del_init(&call->list); map__zput(call->ms.map); + maps__zput(call->ms.maps); free(call); } free(new); @@ -1010,10 +1012,16 @@ merge_chain_branch(struct callchain_cursor *cursor, int err = 0; list_for_each_entry_safe(list, next_list, &src->val, list) { - callchain_cursor_append(cursor, list->ip, &list->ms, - false, NULL, 0, 0, 0, list->srcline); + struct map_symbol ms = { + .maps = maps__get(list->ms.maps), + .map = map__get(list->ms.map), + }; + callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline); list_del_init(&list->list); + map__zput(ms.map); + maps__zput(ms.maps); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } @@ -1068,8 +1076,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, maps__zput(node->ms.maps); map__zput(node->ms.map); node->ms = *ms; - node->ms.maps = maps__get(node->ms.maps); - node->ms.map = map__get(node->ms.map); + node->ms.maps = maps__get(ms->maps); + node->ms.map = map__get(ms->map); node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; @@ -1463,12 +1471,14 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del_init(&list->list); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del_init(&list->list); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } @@ -1554,6 +1564,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) list_for_each_entry_safe(chain, new, &head, list) { list_del_init(&chain->list); map__zput(chain->ms.map); + maps__zput(chain->ms.maps); free(chain); } return -ENOMEM; @@ -1599,8 +1610,10 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) cursor->nr = 0; cursor->last = &cursor->first; - for (node = cursor->first; node != NULL; node = node->next) + for (node = cursor->first; node != NULL; node = node->next) { map__zput(node->ms.map); + maps__zput(node->ms.maps); + } } void callchain_param_setup(u64 sample_type, const char *arch) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 16dc49876e87..bdad4b8bf77d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2321,7 +2321,7 @@ static int add_callchain_ip(struct thread *thread, struct iterations *iter, u64 branch_from) { - struct map_symbol ms; + struct map_symbol ms = {}; struct addr_location al; int nr_loop_iter = 0, err = 0; u64 iter_cycles = 0; @@ -3091,6 +3091,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol * ms struct dso *dso; u64 addr; int ret = 1; + struct map_symbol ilist_ms; if (!symbol_conf.inline_name || !map || !sym) return ret; @@ -3107,18 +3108,20 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms inlines__tree_insert(&dso->inlined_nodes, inline_node); } + ilist_ms = (struct map_symbol) { + .maps = maps__get(ms->maps), + .map = map__get(map), + }; list_for_each_entry(ilist, &inline_node->val, list) { - struct map_symbol ilist_ms = { - .maps = ms->maps, - .map = map, - .sym = ilist->symbol, - }; + ilist_ms.sym = ilist->symbol; ret = callchain_cursor_append(cursor, ip, &ilist_ms, false, NULL, 0, 0, 0, ilist->srcline); if (ret != 0) return ret; } + map__put(ilist_ms.map); + maps__put(ilist_ms.maps); return ret; } ``` [1] http://lkml.kernel.org/r/1338443007-24857-1-git-send-email-namhyung.kim@lge.com > Ian Rogers (20): > perf thread: Remove notion of dead threads > perf thread: Make threads rbtree non-invasive > perf thread: Add accessor functions for thread > perf maps: Make delete static, always use put > perf addr_location: Move to its own header > perf addr_location: Add init/exit/copy functions > perf thread: Add reference count checking > perf machine: Make delete_threads part of machine__exit > perf report: Avoid thread leak > perf header: Ensure bitmaps are freed > perf stat: Avoid evlist leak > perf intel-pt: Fix missed put and leak > perf evlist: Free stats in all evlist destruction > perf python: Avoid 2 leak sanitizer issues > perf jit: Fix two thread leaks > perf symbol-elf: Correct holding a reference > perf maps: Fix overlapping memory leak > perf machine: Fix leak of kernel dso > perf machine: Don't leak module maps > perf map/maps/thread: Changes to reference counting > > tools/perf/arch/arm/tests/dwarf-unwind.c | 2 +- > tools/perf/arch/arm64/tests/dwarf-unwind.c | 2 +- > tools/perf/arch/powerpc/tests/dwarf-unwind.c | 2 +- > tools/perf/arch/x86/tests/dwarf-unwind.c | 2 +- > tools/perf/builtin-annotate.c | 28 +- > tools/perf/builtin-c2c.c | 18 +- > tools/perf/builtin-diff.c | 16 +- > tools/perf/builtin-inject.c | 4 +- > tools/perf/builtin-kmem.c | 13 +- > tools/perf/builtin-kwork.c | 15 +- > tools/perf/builtin-mem.c | 4 +- > tools/perf/builtin-report.c | 21 +- > tools/perf/builtin-sched.c | 80 +++-- > tools/perf/builtin-script.c | 97 +++--- > tools/perf/builtin-stat.c | 1 + > tools/perf/builtin-timechart.c | 11 +- > tools/perf/builtin-top.c | 8 +- > tools/perf/builtin-trace.c | 38 ++- > .../scripts/python/Perf-Trace-Util/Context.c | 4 +- > tools/perf/tests/code-reading.c | 6 +- > tools/perf/tests/dwarf-unwind.c | 1 - > tools/perf/tests/hists_common.c | 2 +- > tools/perf/tests/hists_cumulate.c | 18 +- > tools/perf/tests/hists_filter.c | 11 +- > tools/perf/tests/hists_link.c | 20 +- > tools/perf/tests/hists_output.c | 12 +- > tools/perf/tests/maps.c | 2 +- > tools/perf/tests/mmap-thread-lookup.c | 5 +- > tools/perf/tests/perf-targz-src-pkg | 5 +- > tools/perf/tests/symbols.c | 1 - > tools/perf/tests/thread-maps-share.c | 13 +- > tools/perf/trace/beauty/pid.c | 4 +- > tools/perf/ui/browsers/hists.c | 19 +- > tools/perf/ui/hist.c | 5 +- > tools/perf/ui/stdio/hist.c | 2 +- > tools/perf/util/Build | 1 + > tools/perf/util/addr_location.c | 44 +++ > tools/perf/util/addr_location.h | 31 ++ > tools/perf/util/arm-spe.c | 4 +- > tools/perf/util/build-id.c | 2 + > tools/perf/util/callchain.c | 7 +- > tools/perf/util/cs-etm.c | 28 +- > tools/perf/util/data-convert-json.c | 16 +- > tools/perf/util/db-export.c | 20 +- > tools/perf/util/dlfilter.c | 17 +- > tools/perf/util/event.c | 37 +-- > tools/perf/util/evlist.c | 2 + > tools/perf/util/evsel_fprintf.c | 8 +- > tools/perf/util/header.c | 12 +- > tools/perf/util/hist.c | 22 +- > tools/perf/util/intel-bts.c | 2 +- > tools/perf/util/intel-pt.c | 88 +++--- > tools/perf/util/jitdump.c | 12 +- > tools/perf/util/machine.c | 277 +++++++++--------- > tools/perf/util/map.c | 2 +- > tools/perf/util/maps.c | 5 +- > tools/perf/util/maps.h | 9 +- > tools/perf/util/python.c | 4 + > .../scripting-engines/trace-event-python.c | 28 +- > tools/perf/util/session.c | 8 +- > tools/perf/util/sort.c | 12 +- > tools/perf/util/symbol-elf.c | 4 +- > tools/perf/util/symbol.h | 17 +- > tools/perf/util/thread-stack.c | 25 +- > tools/perf/util/thread.c | 218 +++++++------- > tools/perf/util/thread.h | 210 +++++++++++-- > tools/perf/util/unwind-libdw.c | 27 +- > tools/perf/util/unwind-libunwind-local.c | 19 +- > tools/perf/util/unwind-libunwind.c | 2 +- > tools/perf/util/vdso.c | 2 +- > 70 files changed, 1059 insertions(+), 655 deletions(-) > create mode 100644 tools/perf/util/addr_location.c > create mode 100644 tools/perf/util/addr_location.h > > -- > 2.41.0.rc0.172.g3f132b7071-goog >