@@ -1326,7 +1326,7 @@ static void btf_encoder__delete_saved_funcs(struct btf_encoder *encoder)
}
}
-int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto)
+static int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto)
{
struct btf_encoder_func_state **saved_fns, *s;
struct btf_encoder *e = NULL;
@@ -2134,7 +2134,6 @@ int btf_encoder__encode(struct btf_encoder *encoder, struct conf_load *conf)
int err;
size_t shndx;
- /* for single-threaded case, saved funcs are added here */
btf_encoder__add_saved_funcs(conf->skip_encoding_btf_inconsistent_proto);
for (shndx = 1; shndx < encoder->seccnt; shndx++)
@@ -33,6 +33,4 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu, struct co
struct btf *btf_encoder__btf(struct btf_encoder *encoder);
int btf_encoder__add_encoder(struct btf_encoder *encoder, struct btf_encoder *other);
-int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto);
-
#endif /* _BTF_ENCODER_H_ */
@@ -730,7 +730,7 @@ static int cus__load_btf(struct cus *cus, struct conf_load *conf, const char *fi
* The app stole this cu, possibly deleting it,
* so forget about it
*/
- if (conf && conf->steal && conf->steal(cu, conf, NULL))
+ if (conf && conf->steal && conf->steal(cu, conf))
return 0;
cus__add(cus, cu);
@@ -728,7 +728,7 @@ int ctf__load_file(struct cus *cus, struct conf_load *conf,
* The app stole this cu, possibly deleting it,
* so forget about it
*/
- if (conf && conf->steal && conf->steal(cu, conf, NULL))
+ if (conf && conf->steal && conf->steal(cu, conf))
return 0;
cus__add(cus, cu);
@@ -3250,24 +3250,20 @@ static void cu__sort_types_by_offset(struct cu *cu, struct conf_load *conf)
cu__for_all_tags(cu, type__sort_by_offset, conf);
}
-static int cu__finalize(struct cu *cu, struct cus *cus, struct conf_load *conf, void *thr_data)
+static void cu__finalize(struct cu *cu, struct cus *cus, struct conf_load *conf)
{
cu__for_all_tags(cu, class_member__cache_byte_size, conf);
if (cu__language_reorders_offsets(cu))
cu__sort_types_by_offset(cu, conf);
-
- cus__set_cu_state(cus, cu, CU__LOADED);
-
- if (conf && conf->steal) {
- return conf->steal(cu, conf, thr_data);
- }
- return LSK__KEEPIT;
}
-static int cus__finalize(struct cus *cus, struct cu *cu, struct conf_load *conf, void *thr_data)
+static int cus__steal_now(struct cus *cus, struct cu *cu, struct conf_load *conf)
{
- int lsk = cu__finalize(cu, cus, conf, thr_data);
+ if (!conf || !conf->steal)
+ return 0;
+
+ int lsk = conf->steal(cu, conf);
switch (lsk) {
case LSK__DELETE:
cus__remove(cus, cu);
@@ -3443,11 +3439,110 @@ struct dwarf_cus {
uint32_t nr_cus_created;
};
-struct dwarf_thread {
- struct dwarf_cus *dcus;
- void *data;
+/* Multithreading is implemented using a job/worker model.
+ * cus_processing_queue represents a collection of jobs to be
+ * completed by workers.
+ * dwarf_loader__worker_thread is the worker loop, taking jobs from
+ * the queue, executing them and re-enqueuing new jobs as necessary.
+ * Implementation of this queue ensures two constraints:
+ * * JOB_STEAL jobs for a CU are executed in the order of cu->id, as
+ * a consequence JOB_STEAL jobs always run one at a time.
+ * * Initial number of JOB_DECODE jobs in the queue is effectively a
+ * limit on how many decoded CUs can be held in memory.
+ * See dwarf_loader__decoded_cus_limit()
+ */
+static struct {
+ pthread_mutex_t mutex;
+ pthread_cond_t job_added;
+ /* next_cu_id determines the next CU ready to be stealed
+ * This enforces the order of CU stealing.
+ */
+ uint32_t next_cu_id;
+ struct list_head jobs;
+} cus_processing_queue;
+
+enum job_type {
+ JOB_NONE = 0,
+ JOB_DECODE = 1,
+ JOB_STEAL = 2,
+};
+
+struct cu_processing_job {
+ struct list_head node;
+ enum job_type type;
+ struct cu *cu; /* for JOB_STEAL */
};
+static void cus_queue__init(void)
+{
+ pthread_mutex_init(&cus_processing_queue.mutex, NULL);
+ pthread_cond_init(&cus_processing_queue.job_added, NULL);
+ INIT_LIST_HEAD(&cus_processing_queue.jobs);
+ cus_processing_queue.next_cu_id = 0;
+}
+
+static void cus_queue__destroy(void)
+{
+ pthread_mutex_destroy(&cus_processing_queue.mutex);
+ pthread_cond_destroy(&cus_processing_queue.job_added);
+}
+
+static inline void cus_queue__inc_next_cu_id(void)
+{
+ pthread_mutex_lock(&cus_processing_queue.mutex);
+ cus_processing_queue.next_cu_id++;
+ pthread_mutex_unlock(&cus_processing_queue.mutex);
+}
+
+static struct cu_processing_job *cus_queue__try_dequeue(void)
+{
+ struct cu_processing_job *job, *dequeued_job = NULL;
+ struct list_head *pos, *tmp;
+
+ list_for_each_safe(pos, tmp, &cus_processing_queue.jobs) {
+ job = list_entry(pos, struct cu_processing_job, node);
+ if (job->type == JOB_STEAL && job->cu->id == cus_processing_queue.next_cu_id) {
+ dequeued_job = job;
+ break;
+ }
+ if (job->type == JOB_DECODE) {
+ /* all JOB_STEALs are added to the head, so no viable JOB_STEAL available */
+ dequeued_job = job;
+ break;
+ }
+ }
+ /* No jobs or only steals out of order */
+ if (!dequeued_job)
+ return NULL;
+
+ list_del(&dequeued_job->node);
+ return job;
+}
+
+static struct cu_processing_job *cus_queue__enqdeq_job(struct cu_processing_job *job)
+{
+ pthread_mutex_lock(&cus_processing_queue.mutex);
+ if (job) {
+ /* JOB_STEAL have higher priority, add them to the head so
+ * they can be found faster
+ */
+ if (job->type == JOB_STEAL)
+ list_add(&job->node, &cus_processing_queue.jobs);
+ else
+ list_add_tail(&job->node, &cus_processing_queue.jobs);
+ pthread_cond_signal(&cus_processing_queue.job_added);
+ }
+ for (;;) {
+ job = cus_queue__try_dequeue();
+ if (job)
+ break;
+ /* No jobs or only steals out of order */
+ pthread_cond_wait(&cus_processing_queue.job_added, &cus_processing_queue.mutex);
+ }
+ pthread_mutex_unlock(&cus_processing_queue.mutex);
+ return job;
+}
+
static struct dwarf_cu *dwarf_cus__create_cu(struct dwarf_cus *dcus, Dwarf_Die *cu_die, uint8_t pointer_size)
{
/*
@@ -3479,28 +3574,6 @@ static struct dwarf_cu *dwarf_cus__create_cu(struct dwarf_cus *dcus, Dwarf_Die *
return dcu;
}
-static int dwarf_cus__process_cu(struct dwarf_cus *dcus, Dwarf_Die *cu_die,
- struct cu *cu, void *thr_data)
-{
- if (die__process_and_recode(cu_die, cu, dcus->conf) != 0 ||
- cus__finalize(dcus->cus, cu, dcus->conf, thr_data) == LSK__STOP_LOADING)
- return DWARF_CB_ABORT;
-
- return DWARF_CB_OK;
-}
-
-static int dwarf_cus__create_and_process_cu(struct dwarf_cus *dcus, Dwarf_Die *cu_die, uint8_t pointer_size)
-{
- struct dwarf_cu *dcu = dwarf_cus__create_cu(dcus, cu_die, pointer_size);
-
- if (dcu == NULL)
- return DWARF_CB_ABORT;
-
- cus__add(dcus->cus, dcu->cu);
-
- return dwarf_cus__process_cu(dcus, cu_die, dcu->cu, NULL);
-}
-
static int dwarf_cus__nextcu(struct dwarf_cus *dcus, struct dwarf_cu **dcu,
Dwarf_Die *die_mem, Dwarf_Die **cu_die,
uint8_t *pointer_size, uint8_t *offset_size)
@@ -3541,24 +3614,76 @@ out_unlock:
return ret;
}
-static void *dwarf_cus__process_cu_thread(void *arg)
+static struct cu *dwarf_loader__decode_next_cu(struct dwarf_cus *dcus)
{
- struct dwarf_thread *dthr = arg;
- struct dwarf_cus *dcus = dthr->dcus;
uint8_t pointer_size, offset_size;
+ struct dwarf_cu *dcu = NULL;
Dwarf_Die die_mem, *cu_die;
- struct dwarf_cu *dcu;
+ int err;
- while (dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size) == 0) {
- if (cu_die == NULL)
+ err = dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size);
+
+ if (err < 0)
+ goto out_error;
+ else if (err == 1) /* no more CUs */
+ return NULL;
+
+ err = die__process_and_recode(cu_die, dcu->cu, dcus->conf);
+ if (err)
+ goto out_error;
+ if (cu_die == NULL)
+ return NULL;
+
+ cu__finalize(dcu->cu, dcus->cus, dcus->conf);
+
+ return dcu->cu;
+
+out_error:
+ dcus->error = err;
+ fprintf(stderr, "error decoding cu %s\n", dcu && dcu->cu ? dcu->cu->name : "");
+ return NULL;
+}
+
+static void *dwarf_loader__worker_thread(void *arg)
+{
+ struct cu_processing_job *job = NULL;
+ struct dwarf_cus *dcus = arg;
+ bool stop = false;
+ struct cu *cu;
+
+ while (!stop) {
+ job = cus_queue__enqdeq_job(job);
+ switch (job->type) {
+
+ case JOB_DECODE:
+ cu = dwarf_loader__decode_next_cu(dcus);
+
+ if (cu == NULL) {
+ free(job);
+ stop = true;
+ break;
+ }
+
+ job->type = JOB_STEAL;
+ job->cu = cu;
break;
- if (dwarf_cus__process_cu(dcus, cu_die, dcu->cu, dthr->data) == DWARF_CB_ABORT)
+ case JOB_STEAL:
+ if (cus__steal_now(dcus->cus, job->cu, dcus->conf) == LSK__STOP_LOADING)
+ goto out_abort;
+ cus_queue__inc_next_cu_id();
+ /* re-enqueue JOB_DECODE so that next CU is decoded from DWARF */
+ job->type = JOB_DECODE;
+ job->cu = NULL;
+ break;
+
+ default:
+ fprintf(stderr, "Unknown dwarf_loader job type %d\n", job->type);
goto out_abort;
+ }
}
- if (dcus->conf->thread_exit &&
- dcus->conf->thread_exit(dcus->conf, dthr->data) != 0)
+ if (dcus->error)
goto out_abort;
return (void *)DWARF_CB_OK;
@@ -3566,29 +3691,64 @@ out_abort:
return (void *)DWARF_CB_ABORT;
}
-static int dwarf_cus__threaded_process_cus(struct dwarf_cus *dcus)
+/*
+ * If workers pick up next cu for decoding as soon as they're ready,
+ * then the memory usage may greatly increase, if the stealer can't
+ * keep up with incoming work.
+ * If we want to avoid this there needs to be a limit on how many
+ * decoded, but not yet stolen, CUs we allow to hold in memory. When
+ * this limit is reached the workers will wait for more CUs to get
+ * stolen.
+ * The limit is enforced by the number of JOB_DECODE jobs we enqueue
+ * before the workers have started decoding. A job serves as a
+ * "ticket": worker can proceed with decoding only if it has a ticket.
+ *
+ * As for the value of this limit, it must be at least N, where N is
+ * the number of workers. If the limit < N, some workers will never
+ * work. Setting the limit higher, while allows for higher memory
+ * consumption, does not necessarily improves the total pahole
+ * runtime, likely due to increased concurrent memory allocation.
+ *
+ * Here are some data points that led to the chosen value:
+ *
+ * perf stat -e cpu-clock -r13 ... pahole -J -j$(nproc) ... vmlinux
+ * limit=N 1.58878 +- 0.00859 seconds time elapsed ( +- 0.54% )
+ * limit=Nx2 1.58532 +- 0.00405 seconds time elapsed ( +- 0.26% ) # best
+ * limit=Nx4 1.59415 +- 0.00413 seconds time elapsed ( +- 0.26% )
+ * limit=Nx8 1.62584 +- 0.00448 seconds time elapsed ( +- 0.28% )
+ * limit=Nx1024 1.92333 +- 0.00765 seconds time elapsed ( +- 0.40% )
+ */
+static inline int dwarf_loader__decoded_cus_limit(const struct conf_load *conf)
{
- pthread_t threads[dcus->conf->nr_jobs];
- struct dwarf_thread dthr[dcus->conf->nr_jobs];
- void *thread_data[dcus->conf->nr_jobs];
- int res;
+ return conf->nr_jobs > 0 ? conf->nr_jobs * 2 : 2;
+}
+
+static int dwarf_cus__process_cus(struct dwarf_cus *dcus)
+{
+ int nr_workers = dcus->conf->nr_jobs > 0 ? dcus->conf->nr_jobs : 1;
+ pthread_t workers[nr_workers];
+ struct cu_processing_job *job;
int i;
- if (dcus->conf->threads_prepare) {
- res = dcus->conf->threads_prepare(dcus->conf, dcus->conf->nr_jobs, thread_data);
- if (res != 0)
- return res;
- } else {
- memset(thread_data, 0, sizeof(void *) * dcus->conf->nr_jobs);
- }
+ cus_queue__init();
- for (i = 0; i < dcus->conf->nr_jobs; ++i) {
- dthr[i].dcus = dcus;
- dthr[i].data = thread_data[i];
+ /* Fill up the queue with JOB_DECODE jobs.
+ */
+ for (i = 0; i < dwarf_loader__decoded_cus_limit(dcus->conf); i++) {
+ job = calloc(1, sizeof(*job));
+ if (!job) {
+ dcus->error = -ENOMEM;
+ goto out_error;
+ }
+ job->type = JOB_DECODE;
+ /* no need for locks, workers were not started yet */
+ list_add(&job->node, &cus_processing_queue.jobs);
+ }
- dcus->error = pthread_create(&threads[i], NULL,
- dwarf_cus__process_cu_thread,
- &dthr[i]);
+ for (i = 0; i < nr_workers; ++i) {
+ dcus->error = pthread_create(&workers[i], NULL,
+ dwarf_loader__worker_thread,
+ dcus);
if (dcus->error)
goto out_join;
}
@@ -3598,52 +3758,18 @@ static int dwarf_cus__threaded_process_cus(struct dwarf_cus *dcus)
out_join:
while (--i >= 0) {
void *res;
- int err = pthread_join(threads[i], &res);
+ int err = pthread_join(workers[i], &res);
if (err == 0 && res != NULL)
dcus->error = (long)res;
}
- if (dcus->conf->threads_collect) {
- res = dcus->conf->threads_collect(dcus->conf, dcus->conf->nr_jobs,
- thread_data, dcus->error);
- if (dcus->error == 0)
- dcus->error = res;
- }
+out_error:
+ cus_queue__destroy();
return dcus->error;
}
-static int __dwarf_cus__process_cus(struct dwarf_cus *dcus)
-{
- uint8_t pointer_size, offset_size;
- Dwarf_Off noff;
- size_t cuhl;
-
- while (dwarf_nextcu(dcus->dw, dcus->off, &noff, &cuhl, NULL, &pointer_size, &offset_size) == 0) {
- Dwarf_Die die_mem;
- Dwarf_Die *cu_die = dwarf_offdie(dcus->dw, dcus->off + cuhl, &die_mem);
-
- if (cu_die == NULL)
- break;
-
- if (dwarf_cus__create_and_process_cu(dcus, cu_die, pointer_size) == DWARF_CB_ABORT)
- return DWARF_CB_ABORT;
-
- dcus->off = noff;
- }
-
- return 0;
-}
-
-static int dwarf_cus__process_cus(struct dwarf_cus *dcus)
-{
- if (dcus->conf->nr_jobs > 1)
- return dwarf_cus__threaded_process_cus(dcus);
-
- return __dwarf_cus__process_cus(dcus);
-}
-
static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
Dwfl_Module *mod, Dwarf *dw, Elf *elf,
const char *filename,
@@ -3733,7 +3859,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
if (cu__resolve_func_ret_types_optimized(cu) != LSK__KEEPIT)
goto out_abort;
- if (cus__finalize(cus, cu, conf, NULL) == LSK__STOP_LOADING)
+ cu__finalize(cu, cus, conf);
+ if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
goto out_abort;
return 0;
@@ -3765,7 +3892,8 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf,
}
if (type_cu != NULL) {
- type_lsk = cu__finalize(type_cu, cus, conf, NULL);
+ cu__finalize(type_cu, cus, conf);
+ type_lsk = cus__steal_now(cus, type_cu, conf);
if (type_lsk == LSK__DELETE) {
cus__remove(cus, type_cu);
}
@@ -3787,6 +3915,7 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf,
.type_dcu = type_cu ? &type_dcu : NULL,
.build_id = build_id,
.build_id_len = build_id_len,
+ .nr_cus_created = 0,
};
res = dwarf_cus__process_cus(&dcus);
}
@@ -530,48 +530,6 @@ void cus__unlock(struct cus *cus)
pthread_mutex_unlock(&cus->mutex);
}
-void cus__set_cu_state(struct cus *cus, struct cu *cu, enum cu_state state)
-{
- cus__lock(cus);
- cu->state = state;
- cus__unlock(cus);
-}
-
-// Used only when reproducible builds are desired
-struct cu *cus__get_next_processable_cu(struct cus *cus)
-{
- struct cu *cu;
-
- cus__lock(cus);
-
- list_for_each_entry(cu, &cus->cus, node) {
- switch (cu->state) {
- case CU__LOADED:
- cu->state = CU__PROCESSING;
- goto found;
- case CU__PROCESSING:
- // This will happen when we get to parallel
- // reproducible BTF encoding, libbpf dedup work needed
- // here. The other possibility is when we're flushing
- // the DWARF processed CUs when the parallel DWARF
- // loading stoped and we still have CUs to encode to
- // BTF because of ordering requirements.
- continue;
- case CU__UNPROCESSED:
- // The first entry isn't loaded, signal the
- // caller to return and try another day, as we
- // need to respect the original DWARF CU ordering.
- goto out;
- }
- }
-out:
- cu = NULL;
-found:
- cus__unlock(cus);
-
- return cu;
-}
-
bool cus__empty(const struct cus *cus)
{
return list_empty(&cus->cus);
@@ -808,8 +766,6 @@ struct cu *cu__new(const char *name, uint8_t addr_size,
cu->addr_size = addr_size;
cu->extra_dbg_info = 0;
- cu->state = CU__UNPROCESSED;
-
cu->nr_inline_expansions = 0;
cu->size_inline_expansions = 0;
cu->nr_structures_changed = 0;
@@ -44,12 +44,6 @@ enum load_steal_kind {
LSK__STOP_LOADING,
};
-enum cu_state {
- CU__UNPROCESSED,
- CU__LOADED,
- CU__PROCESSING,
-};
-
/*
* BTF combines all the types into one big CU using btf_dedup(), so for something
* like a allyesconfig vmlinux kernel we can get over 65535 types.
@@ -60,7 +54,6 @@ struct btf;
struct conf_fprintf;
/** struct conf_load - load configuration
- * @thread_exit - called at the end of a thread, 1st user: BTF encoder dedup
* @extra_dbg_info - keep original debugging format extra info
* (e.g. DWARF's decl_{line,file}, id, etc)
* @fixup_silly_bitfields - Fixup silly things such as "int foo:32;"
@@ -70,11 +63,9 @@ struct conf_fprintf;
* @skip_missing - skip missing types rather than bailing out.
*/
struct conf_load {
- enum load_steal_kind (*steal)(struct cu *cu,
- struct conf_load *conf,
- void *thr_data);
+ enum load_steal_kind (*steal)(struct cu *cu, struct conf_load *conf);
struct cu * (*early_cu_filter)(struct cu *cu);
- int (*thread_exit)(struct conf_load *conf, void *thr_data);
+ int (*pre_load_module)(Dwfl_Module *mod, Elf *elf);
void *cookie;
char *format_path;
int nr_jobs;
@@ -105,8 +96,6 @@ struct conf_load {
const char *kabi_prefix;
struct btf *base_btf;
struct conf_fprintf *conf_fprintf;
- int (*threads_prepare)(struct conf_load *conf, int nr_threads, void **thr_data);
- int (*threads_collect)(struct conf_load *conf, int nr_threads, void **thr_data, int error);
};
/** struct conf_fprintf - hints to the __fprintf routines
@@ -188,10 +177,6 @@ void cus__add(struct cus *cus, struct cu *cu);
void __cus__remove(struct cus *cus, struct cu *cu);
void cus__remove(struct cus *cus, struct cu *cu);
-struct cu *cus__get_next_processable_cu(struct cus *cus);
-
-void cus__set_cu_state(struct cus *cus, struct cu *cu, enum cu_state state);
-
void cus__print_error_msg(const char *progname, const struct cus *cus,
const char *filename, const int err);
struct cu *cus__find_pair(struct cus *cus, const char *name);
@@ -308,7 +293,6 @@ struct cu {
uint8_t nr_register_params;
int register_params[ARCH_MAX_REGISTER_PARAMS];
int functions_saved;
- enum cu_state state;
uint16_t language;
unsigned long nr_inline_expansions;
size_t size_inline_expansions;
@@ -3118,6 +3118,32 @@ out:
return ret;
}
+static enum load_steal_kind pahole_stealer__btf_encode(struct cu *cu, struct conf_load *conf_load)
+{
+ int err;
+
+ if (!btf_encoder)
+ btf_encoder = btf_encoder__new(cu,
+ detached_btf_filename,
+ conf_load->base_btf,
+ global_verbose,
+ conf_load);
+
+ if (!btf_encoder) {
+ fprintf(stderr, "Error creating BTF encoder.\n");
+ return LSK__STOP_LOADING;
+ }
+
+ err = btf_encoder__encode_cu(btf_encoder, cu, conf_load);
+ if (err < 0) {
+ fprintf(stderr, "Error while encoding BTF.\n");
+ return LSK__STOP_LOADING;
+ }
+
+ return LSK__DELETE;
+}
+
+
static struct type_instance *header;
static bool print_enumeration_with_enumerator(struct cu *cu, const char *name)
@@ -3136,87 +3162,7 @@ static bool print_enumeration_with_enumerator(struct cu *cu, const char *name)
return false;
}
-struct thread_data {
- struct btf *btf;
- struct btf_encoder *encoder;
-};
-
-static int pahole_threads_prepare_reproducible_build(struct conf_load *conf, int nr_threads, void **thr_data)
-{
- for (int i = 0; i < nr_threads; i++)
- thr_data[i] = NULL;
-
- return 0;
-}
-
-static int pahole_threads_prepare(struct conf_load *conf, int nr_threads, void **thr_data)
-{
- int i;
- struct thread_data *threads = calloc(sizeof(struct thread_data), nr_threads);
-
- for (i = 0; i < nr_threads; i++)
- thr_data[i] = threads + i;
-
- return 0;
-}
-
-static int pahole_thread_exit(struct conf_load *conf, void *thr_data)
-{
- struct thread_data *thread = thr_data;
-
- if (thread == NULL)
- return 0;
-
- /*
- * Here we will call btf__dedup() here once we extend
- * btf__dedup().
- */
-
- return 0;
-}
-
-static int pahole_threads_collect(struct conf_load *conf, int nr_threads, void **thr_data,
- int error)
-{
- struct thread_data **threads = (struct thread_data **)thr_data;
- int i;
- int err = 0;
-
- if (error)
- goto out;
-
- err = btf_encoder__add_saved_funcs(conf_load.skip_encoding_btf_inconsistent_proto);
- if (err < 0)
- goto out;
-
- for (i = 0; i < nr_threads; i++) {
- /*
- * Merge content of the btf instances of worker threads to the btf
- * instance of the primary btf_encoder.
- */
- if (!threads[i]->btf)
- continue;
- err = btf_encoder__add_encoder(btf_encoder, threads[i]->encoder);
- if (err < 0)
- goto out;
- }
- err = 0;
-
-out:
- for (i = 0; i < nr_threads; i++) {
- if (threads[i]->encoder && threads[i]->encoder != btf_encoder) {
- btf_encoder__delete(threads[i]->encoder);
- threads[i]->encoder = NULL;
- }
- }
- free(threads[0]);
-
- return err;
-}
-
-static enum load_steal_kind pahole_stealer(struct cu *cu,
- struct conf_load *conf_load,
- void *thr_data)
+static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf_load)
{
int ret = LSK__DELETE;
@@ -3238,94 +3184,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu,
return LSK__DELETE; // Maybe we can find this in several CUs, so don't stop it
if (btf_encode) {
- static pthread_mutex_t btf_lock = PTHREAD_MUTEX_INITIALIZER;
- struct btf_encoder *encoder;
-
- pthread_mutex_lock(&btf_lock);
- /*
- * FIXME:
- *
- * This should be really done at main(), but since in the current codebase only at this
- * point we'll have cu->elf setup...
- */
- if (!btf_encoder) {
- /*
- * btf_encoder is the primary encoder.
- * And, it is used by the thread
- * create it.
- */
- btf_encoder = btf_encoder__new(cu, detached_btf_filename, conf_load->base_btf,
- global_verbose, conf_load);
- if (btf_encoder && thr_data) {
- struct thread_data *thread = thr_data;
-
- thread->encoder = btf_encoder;
- thread->btf = btf_encoder__btf(btf_encoder);
- }
- }
-
- // Reproducible builds don't have multiple btf_encoders, so we need to keep the lock until we encode BTF for this CU.
- if (thr_data)
- pthread_mutex_unlock(&btf_lock);
-
- if (!btf_encoder) {
- ret = LSK__STOP_LOADING;
- goto out_btf;
- }
-
- /*
- * thr_data keeps per-thread data for worker threads. Each worker thread
- * has an encoder. The main thread will merge the data collected by all
- * these encoders to btf_encoder. However, the first thread reaching this
- * function creates btf_encoder and reuses it as its local encoder. It
- * avoids copying the data collected by the first thread.
- */
- if (thr_data) {
- struct thread_data *thread = thr_data;
-
- if (thread->encoder == NULL) {
- thread->encoder =
- btf_encoder__new(cu, detached_btf_filename,
- NULL,
- global_verbose,
- conf_load);
- thread->btf = btf_encoder__btf(thread->encoder);
- }
- encoder = thread->encoder;
- } else {
- encoder = btf_encoder;
- }
-
- // Since we don't have yet a way to parallelize the BTF encoding, we
- // need to ask the loader for the next CU that we can process, one
- // that is loaded and is in order, if the next one isn't yet loaded,
- // then return to let the DWARF loader thread to load the next one,
- // eventually all will get processed, even if when all DWARF loading
- // threads finish.
- if (conf_load->reproducible_build) {
- ret = LSK__KEEPIT; // we're not processing the cu passed to this
- // function, so keep it.
- cu = cus__get_next_processable_cu(cus);
- if (cu == NULL)
- goto out_btf;
- }
-
- ret = btf_encoder__encode_cu(encoder, cu, conf_load);
- if (ret < 0) {
- fprintf(stderr, "Encountered error while encoding BTF.\n");
- exit(1);
- }
-
- if (conf_load->reproducible_build) {
- ret = LSK__KEEPIT; // we're not processing the cu passed to this function, so keep it.
- // Kinda equivalent to LSK__DELETE since we processed this, but we can't delete it
- // as we stash references to entries in CUs for 'struct function' in btf_encoder__add_saved_funcs()
- // and btf_encoder__save_func(), so we can't delete them here. - Alan Maguire
- }
-out_btf:
- if (!thr_data) // See comment about reproducibe_build above
- pthread_mutex_unlock(&btf_lock);
- return ret;
+ return pahole_stealer__btf_encode(cu, conf_load);
}
#if 0
if (ctf_encode) {
@@ -3625,24 +3484,6 @@ out_free:
return ret;
}
-static int cus__flush_reproducible_build(struct cus *cus, struct btf_encoder *encoder, struct conf_load *conf_load)
-{
- int err = 0;
-
- while (true) {
- struct cu *cu = cus__get_next_processable_cu(cus);
-
- if (cu == NULL)
- break;
-
- err = btf_encoder__encode_cu(encoder, cu, conf_load);
- if (err < 0)
- break;
- }
-
- return err;
-}
-
int main(int argc, char *argv[])
{
int err, remaining, rc = EXIT_FAILURE;
@@ -3731,16 +3572,6 @@ int main(int argc, char *argv[])
if (languages.exclude)
conf_load.early_cu_filter = cu__filter;
- conf_load.thread_exit = pahole_thread_exit;
-
- if (conf_load.reproducible_build) {
- conf_load.threads_prepare = pahole_threads_prepare_reproducible_build;
- conf_load.threads_collect = NULL;
- } else {
- conf_load.threads_prepare = pahole_threads_prepare;
- conf_load.threads_collect = pahole_threads_collect;
- }
-
// Make 'pahole --header type < file' a shorter form of 'pahole -C type --count 1 < file'
if (conf.header_type && !class_name && prettify_input) {
conf.count = 1;
@@ -3840,12 +3671,6 @@ try_sole_arg_as_class_names:
header = NULL;
if (btf_encode && btf_encoder) { // maybe all CUs were filtered out and thus we don't have an encoder?
- if (conf_load.reproducible_build &&
- cus__flush_reproducible_build(cus, btf_encoder, &conf_load) < 0) {
- fprintf(stderr, "Encountered error while encoding BTF.\n");
- exit(1);
- }
-
err = btf_encoder__encode(btf_encoder, &conf_load);
btf_encoder__delete(btf_encoder);
if (err) {
@@ -91,8 +91,7 @@ static int cu__emit_tags(struct cu *cu)
}
static enum load_steal_kind pdwtags_stealer(struct cu *cu,
- struct conf_load *conf_load __maybe_unused,
- void *thr_data __maybe_unused)
+ struct conf_load *conf_load __maybe_unused)
{
cu__emit_tags(cu);
return LSK__DELETE;
@@ -510,8 +510,7 @@ int elf_symtabs__show(char *filenames[])
}
static enum load_steal_kind pfunct_stealer(struct cu *cu,
- struct conf_load *conf_load __maybe_unused,
- void *thr_data __maybe_unused)
+ struct conf_load *conf_load __maybe_unused)
{
if (function_name) {
@@ -37,10 +37,7 @@ for threads in $(seq $nr_proc) ; do
sleep 0.3s
# PID part to remove ps output headers
nr_threads_started=$(ps -L -C pahole | grep -v PID | wc -l)
-
- if [ $threads -gt 1 ] ; then
- ((nr_threads_started -= 1))
- fi
+ ((nr_threads_started -= 1)) # main thread doesn't count, it waits to join
if [ $threads != $nr_threads_started ] ; then
echo "ERROR: pahole asked to start $threads encoding threads, started $nr_threads_started"