Message ID | 1563786452-22188-4-git-send-email-amasule@codeaurora.org (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Andy Gross |
Headers | show |
Series | media: venus: Update clock scaling and core selection | expand |
Hi, On 7/22/19 12:07 PM, Aniket Masule wrote: > Present core assignment is static. Introduced load balancing > across the cores. Load on earch core is calculated and core > with minimum load is assigned to given instance. > > Signed-off-by: Aniket Masule <amasule@codeaurora.org> > --- > drivers/media/platform/qcom/venus/helpers.c | 69 +++++++++++++++++++++++--- > drivers/media/platform/qcom/venus/helpers.h | 2 +- > drivers/media/platform/qcom/venus/hfi_helper.h | 1 + > drivers/media/platform/qcom/venus/hfi_parser.h | 5 ++ > drivers/media/platform/qcom/venus/vdec.c | 2 +- > drivers/media/platform/qcom/venus/venc.c | 2 +- > 6 files changed, 72 insertions(+), 9 deletions(-) > > diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c > index edf403d..3b6cbbf 100644 > --- a/drivers/media/platform/qcom/venus/helpers.c > +++ b/drivers/media/platform/qcom/venus/helpers.c > @@ -26,6 +26,7 @@ > #include "helpers.h" > #include "hfi_helper.h" > #include "hfi_venus_io.h" > +#include "hfi_parser.h" > > struct intbuf { > struct list_head list; > @@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst) > return mbs * inst->fps; > } > > +static u32 load_per_core(struct venus_core *core, u32 core_id) > +{ > + struct venus_inst *inst = NULL; > + u32 mbs_per_sec = 0, load = 0; > + > + mutex_lock(&core->lock); > + list_for_each_entry(inst, &core->instances, list) { > + if (inst->clk_data.core_id != core_id) > + continue; > + > + mbs_per_sec = load_per_instance(inst); > + load += mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq; > + } > + mutex_unlock(&core->lock); > + > + return load; > +} > + > static u32 load_per_type(struct venus_core *core, u32 session_type) > { > struct venus_inst *inst = NULL; > @@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst *inst) > return scale_clocks(inst); > } > > +int set_core_usage(struct venus_inst *inst, u32 usage) > +{ > + const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; > + struct hfi_videocores_usage_type cu; > + > + cu.video_core_enable_mask = usage; > + > + return hfi_session_set_property(inst, ptype, &cu); > +} > + > static void fill_buffer_desc(const struct venus_buffer *buf, > struct hfi_buffer_desc *bd, bool response) > { > @@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode) > } > EXPORT_SYMBOL_GPL(venus_helper_set_work_mode); > > -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage) > +int venus_helper_set_core(struct venus_inst *inst) > { > - const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; > - struct hfi_videocores_usage_type cu; > + struct venus_core *core = inst->core; > + u32 min_core_id = 0, core1_load = 0, core2_load = 0; > + unsigned long min_load, max_freq, cur_inst_load; > + u32 cores_max; > + int ret; > > if (!IS_V4(inst->core)) > return 0; > > - cu.video_core_enable_mask = usage; > + core1_load = load_per_core(core, VIDC_CORE_ID_1); > + core2_load = load_per_core(core, VIDC_CORE_ID_2); > + min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2; > + min_load = min(core1_load, core2_load); > + cores_max = core_num_max(inst); > > - return hfi_session_set_property(inst, ptype, &cu); > + if (cores_max < VIDC_CORE_ID_2) { > + min_core_id = VIDC_CORE_ID_1; > + min_load = core1_load; > + } > + > + cur_inst_load = load_per_instance(inst) * > + inst->clk_data.codec_freq_data->vpp_freq; > + max_freq = core->res->freq_tbl[0].freq; > + > + if ((cur_inst_load + min_load) > max_freq) { > + dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n", > + cur_inst_load, max_freq); > + return -EINVAL; > + } > + > + ret = set_core_usage(inst, min_core_id); We have a problem here. Lets assume that we have only one running decoder session and the code above decides that it should be handled by core2, but core2 clocks presently are enabled only if there is an encoder session (see DT subnodes), thus we select core2 but without enabling core2 clocks and power domain. > + if (ret) > + return ret; > + > + inst->clk_data.core_id = min_core_id; > + > + return 0; > } > -EXPORT_SYMBOL_GPL(venus_helper_set_core_usage); > +EXPORT_SYMBOL_GPL(venus_helper_set_core); >
Hi, On 2019-07-29 15:16, Stanimir Varbanov wrote: > Hi, > > On 7/22/19 12:07 PM, Aniket Masule wrote: >> Present core assignment is static. Introduced load balancing >> across the cores. Load on earch core is calculated and core >> with minimum load is assigned to given instance. >> >> Signed-off-by: Aniket Masule <amasule@codeaurora.org> >> --- >> drivers/media/platform/qcom/venus/helpers.c | 69 >> +++++++++++++++++++++++--- >> drivers/media/platform/qcom/venus/helpers.h | 2 +- >> drivers/media/platform/qcom/venus/hfi_helper.h | 1 + >> drivers/media/platform/qcom/venus/hfi_parser.h | 5 ++ >> drivers/media/platform/qcom/venus/vdec.c | 2 +- >> drivers/media/platform/qcom/venus/venc.c | 2 +- >> 6 files changed, 72 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/media/platform/qcom/venus/helpers.c >> b/drivers/media/platform/qcom/venus/helpers.c >> index edf403d..3b6cbbf 100644 >> --- a/drivers/media/platform/qcom/venus/helpers.c >> +++ b/drivers/media/platform/qcom/venus/helpers.c >> @@ -26,6 +26,7 @@ >> #include "helpers.h" >> #include "hfi_helper.h" >> #include "hfi_venus_io.h" >> +#include "hfi_parser.h" >> >> struct intbuf { >> struct list_head list; >> @@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst >> *inst) >> return mbs * inst->fps; >> } >> >> +static u32 load_per_core(struct venus_core *core, u32 core_id) >> +{ >> + struct venus_inst *inst = NULL; >> + u32 mbs_per_sec = 0, load = 0; >> + >> + mutex_lock(&core->lock); >> + list_for_each_entry(inst, &core->instances, list) { >> + if (inst->clk_data.core_id != core_id) >> + continue; >> + >> + mbs_per_sec = load_per_instance(inst); >> + load += mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq; >> + } >> + mutex_unlock(&core->lock); >> + >> + return load; >> +} >> + >> static u32 load_per_type(struct venus_core *core, u32 session_type) >> { >> struct venus_inst *inst = NULL; >> @@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst >> *inst) >> return scale_clocks(inst); >> } >> >> +int set_core_usage(struct venus_inst *inst, u32 usage) >> +{ >> + const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; >> + struct hfi_videocores_usage_type cu; >> + >> + cu.video_core_enable_mask = usage; >> + >> + return hfi_session_set_property(inst, ptype, &cu); >> +} >> + >> static void fill_buffer_desc(const struct venus_buffer *buf, >> struct hfi_buffer_desc *bd, bool response) >> { >> @@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst >> *inst, u32 mode) >> } >> EXPORT_SYMBOL_GPL(venus_helper_set_work_mode); >> >> -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage) >> +int venus_helper_set_core(struct venus_inst *inst) >> { >> - const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; >> - struct hfi_videocores_usage_type cu; >> + struct venus_core *core = inst->core; >> + u32 min_core_id = 0, core1_load = 0, core2_load = 0; >> + unsigned long min_load, max_freq, cur_inst_load; >> + u32 cores_max; >> + int ret; >> >> if (!IS_V4(inst->core)) >> return 0; >> >> - cu.video_core_enable_mask = usage; >> + core1_load = load_per_core(core, VIDC_CORE_ID_1); >> + core2_load = load_per_core(core, VIDC_CORE_ID_2); >> + min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : >> VIDC_CORE_ID_2; >> + min_load = min(core1_load, core2_load); >> + cores_max = core_num_max(inst); >> >> - return hfi_session_set_property(inst, ptype, &cu); >> + if (cores_max < VIDC_CORE_ID_2) { >> + min_core_id = VIDC_CORE_ID_1; >> + min_load = core1_load; >> + } >> + >> + cur_inst_load = load_per_instance(inst) * >> + inst->clk_data.codec_freq_data->vpp_freq; >> + max_freq = core->res->freq_tbl[0].freq; >> + >> + if ((cur_inst_load + min_load) > max_freq) { >> + dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n", >> + cur_inst_load, max_freq); >> + return -EINVAL; >> + } >> + >> + ret = set_core_usage(inst, min_core_id); > > We have a problem here. Lets assume that we have only one running > decoder session and the code above decides that it should be handled by > core2, but core2 clocks presently are enabled only if there is an > encoder session (see DT subnodes), thus we select core2 but without > enabling core2 clocks and power domain. > I will send a separate patch for core selection, once power domain issue is fixed. >> + if (ret) >> + return ret; >> + >> + inst->clk_data.core_id = min_core_id; >> + >> + return 0; >> } >> -EXPORT_SYMBOL_GPL(venus_helper_set_core_usage); >> +EXPORT_SYMBOL_GPL(venus_helper_set_core); >> Regards, Aniket
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index edf403d..3b6cbbf 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -26,6 +26,7 @@ #include "helpers.h" #include "hfi_helper.h" #include "hfi_venus_io.h" +#include "hfi_parser.h" struct intbuf { struct list_head list; @@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst) return mbs * inst->fps; } +static u32 load_per_core(struct venus_core *core, u32 core_id) +{ + struct venus_inst *inst = NULL; + u32 mbs_per_sec = 0, load = 0; + + mutex_lock(&core->lock); + list_for_each_entry(inst, &core->instances, list) { + if (inst->clk_data.core_id != core_id) + continue; + + mbs_per_sec = load_per_instance(inst); + load += mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq; + } + mutex_unlock(&core->lock); + + return load; +} + static u32 load_per_type(struct venus_core *core, u32 session_type) { struct venus_inst *inst = NULL; @@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst *inst) return scale_clocks(inst); } +int set_core_usage(struct venus_inst *inst, u32 usage) +{ + const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; + struct hfi_videocores_usage_type cu; + + cu.video_core_enable_mask = usage; + + return hfi_session_set_property(inst, ptype, &cu); +} + static void fill_buffer_desc(const struct venus_buffer *buf, struct hfi_buffer_desc *bd, bool response) { @@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode) } EXPORT_SYMBOL_GPL(venus_helper_set_work_mode); -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage) +int venus_helper_set_core(struct venus_inst *inst) { - const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; - struct hfi_videocores_usage_type cu; + struct venus_core *core = inst->core; + u32 min_core_id = 0, core1_load = 0, core2_load = 0; + unsigned long min_load, max_freq, cur_inst_load; + u32 cores_max; + int ret; if (!IS_V4(inst->core)) return 0; - cu.video_core_enable_mask = usage; + core1_load = load_per_core(core, VIDC_CORE_ID_1); + core2_load = load_per_core(core, VIDC_CORE_ID_2); + min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2; + min_load = min(core1_load, core2_load); + cores_max = core_num_max(inst); - return hfi_session_set_property(inst, ptype, &cu); + if (cores_max < VIDC_CORE_ID_2) { + min_core_id = VIDC_CORE_ID_1; + min_load = core1_load; + } + + cur_inst_load = load_per_instance(inst) * + inst->clk_data.codec_freq_data->vpp_freq; + max_freq = core->res->freq_tbl[0].freq; + + if ((cur_inst_load + min_load) > max_freq) { + dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n", + cur_inst_load, max_freq); + return -EINVAL; + } + + ret = set_core_usage(inst, min_core_id); + if (ret) + return ret; + + inst->clk_data.core_id = min_core_id; + + return 0; } -EXPORT_SYMBOL_GPL(venus_helper_set_core_usage); +EXPORT_SYMBOL_GPL(venus_helper_set_core); int venus_helper_init_codec_freq_data(struct venus_inst *inst) { diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h index 2c13245..1034111 100644 --- a/drivers/media/platform/qcom/venus/helpers.h +++ b/drivers/media/platform/qcom/venus/helpers.h @@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst *inst, u32 buftype); int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode); int venus_helper_init_codec_freq_data(struct venus_inst *inst); -int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage); +int venus_helper_set_core(struct venus_inst *inst); int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs, unsigned int output_bufs, unsigned int output2_bufs); diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h b/drivers/media/platform/qcom/venus/hfi_helper.h index 34ea503..f3d1018 100644 --- a/drivers/media/platform/qcom/venus/hfi_helper.h +++ b/drivers/media/platform/qcom/venus/hfi_helper.h @@ -559,6 +559,7 @@ struct hfi_bitrate { #define HFI_CAPABILITY_LCU_SIZE 0x14 #define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS 0x15 #define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE 0x16 +#define HFI_CAPABILITY_MAX_VIDEOCORES 0x2B struct hfi_capability { u32 capability_type; diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h b/drivers/media/platform/qcom/venus/hfi_parser.h index 3e931c7..264e6dd 100644 --- a/drivers/media/platform/qcom/venus/hfi_parser.h +++ b/drivers/media/platform/qcom/venus/hfi_parser.h @@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst) return cap_step(inst, HFI_CAPABILITY_FRAMERATE); } +static inline u32 core_num_max(struct venus_inst *inst) +{ + return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES); +} + #endif diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index d037f80..620e060 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -551,7 +551,7 @@ static int vdec_output_conf(struct venus_inst *inst) if (ret) return ret; - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1); + ret = venus_helper_set_core(inst); if (ret) return ret; diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index cdddc82..28e76cc 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -660,7 +660,7 @@ static int venc_set_properties(struct venus_inst *inst) if (ret) return ret; - ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_2); + ret = venus_helper_set_core(inst); if (ret) return ret;
Present core assignment is static. Introduced load balancing across the cores. Load on earch core is calculated and core with minimum load is assigned to given instance. Signed-off-by: Aniket Masule <amasule@codeaurora.org> --- drivers/media/platform/qcom/venus/helpers.c | 69 +++++++++++++++++++++++--- drivers/media/platform/qcom/venus/helpers.h | 2 +- drivers/media/platform/qcom/venus/hfi_helper.h | 1 + drivers/media/platform/qcom/venus/hfi_parser.h | 5 ++ drivers/media/platform/qcom/venus/vdec.c | 2 +- drivers/media/platform/qcom/venus/venc.c | 2 +- 6 files changed, 72 insertions(+), 9 deletions(-)