@@ -352,6 +352,17 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
goto err_dec_pm;
}
+ if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch)) {
+ vdec_msg_queue_init_ctx(&dev->msg_queue_core_ctx, MTK_VDEC_CORE);
+ dev->core_workqueue = alloc_ordered_workqueue("core-decoder",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE);
+ if (!dev->core_workqueue) {
+ mtk_v4l2_err("Failed to create core workqueue");
+ ret = -EINVAL;
+ goto err_res;
+ }
+ }
+
for (i = 0; i < MTK_VDEC_HW_MAX; i++)
mutex_init(&dev->dec_mutex[i]);
spin_lock_init(&dev->irqlock);
@@ -362,7 +373,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
if (ret) {
mtk_v4l2_err("v4l2_device_register err=%d", ret);
- goto err_res;
+ goto err_core_workq;
}
init_waitqueue_head(&dev->queue);
@@ -459,6 +470,9 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
video_unregister_device(vfd_dec);
err_dec_alloc:
v4l2_device_unregister(&dev->v4l2_dev);
+err_core_workq:
+ if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch))
+ destroy_workqueue(dev->core_workqueue);
err_res:
mtk_vcodec_release_dec_pm(&dev->pm);
err_dec_pm:
@@ -27,6 +27,7 @@
#define MTK_VCODEC_MAX_PLANES 3
#define MTK_V4L2_BENCHMARK 0
#define WAIT_INTR_TIMEOUT_MS 1000
+#define IS_VDEC_LAT_ARCH(hw_arch) ((hw_arch) >= MTK_VDEC_LAT_SINGLE_CORE)
/*
* enum mtk_hw_reg_idx - MTK hw register base index
@@ -467,6 +468,7 @@ struct mtk_vcodec_enc_pdata {
* @comp_dev: component hardware device
* @component_node: component node
*
+ * @core_workqueue: queue used for core hardware decode
* @msg_queue_core_ctx: msg queue context used for core thread
*
* @hardware_bitmap: used to record hardware is ready or not
@@ -511,6 +513,7 @@ struct mtk_vcodec_dev {
void *comp_dev[MTK_VDEC_HW_MAX];
struct device_node *component_node[MTK_VDEC_HW_MAX];
+ struct workqueue_struct *core_workqueue;
struct vdec_msg_queue_ctx msg_queue_core_ctx;
DECLARE_BITMAP(hardware_bitmap, MTK_VDEC_HW_MAX);
@@ -69,6 +69,9 @@ void vdec_msg_queue_qbuf(struct vdec_msg_queue_ctx *msg_ctx,
if (msg_ctx->hardware_index != MTK_VDEC_CORE)
wake_up_all(&msg_ctx->ready_to_use);
+ else
+ queue_work(buf->ctx->dev->core_workqueue,
+ &buf->ctx->msg_queue.core_work);
mtk_v4l2_debug(3, "enqueue buf type: %d addr: 0x%p num: %d",
msg_ctx->hardware_index, buf, msg_ctx->ready_num);
@@ -170,8 +173,7 @@ bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue)
return false;
}
-void vdec_msg_queue_deinit(
- struct vdec_msg_queue *msg_queue,
+void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue,
struct mtk_vcodec_ctx *ctx)
{
struct vdec_lat_buf *lat_buf;
@@ -197,10 +199,36 @@ void vdec_msg_queue_deinit(
}
}
-int vdec_msg_queue_init(
- struct vdec_msg_queue *msg_queue,
- struct mtk_vcodec_ctx *ctx,
- core_decode_cb_t core_decode,
+static void vdec_msg_queue_core_work(struct work_struct *work)
+{
+ struct vdec_msg_queue *msg_queue =
+ container_of(work, struct vdec_msg_queue, core_work);
+ struct mtk_vcodec_ctx *ctx =
+ container_of(msg_queue, struct mtk_vcodec_ctx, msg_queue);
+ struct mtk_vcodec_dev *dev = ctx->dev;
+ struct vdec_lat_buf *lat_buf;
+
+ lat_buf = vdec_msg_queue_dqbuf(&dev->msg_queue_core_ctx);
+ if (!lat_buf)
+ return;
+
+ ctx = lat_buf->ctx;
+ mtk_vcodec_set_curr_ctx(dev, ctx, MTK_VDEC_CORE);
+
+ lat_buf->core_decode(lat_buf);
+
+ mtk_vcodec_set_curr_ctx(dev, NULL, MTK_VDEC_CORE);
+ vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
+
+ if (!list_empty(&ctx->msg_queue.lat_ctx.ready_queue)) {
+ mtk_v4l2_debug(3, "re-schedule to decode for core",
+ dev->msg_queue_core_ctx.ready_num);
+ queue_work(dev->core_workqueue, &msg_queue->core_work);
+ }
+}
+
+int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue,
+ struct mtk_vcodec_ctx *ctx, core_decode_cb_t core_decode,
int private_size)
{
struct vdec_lat_buf *lat_buf;
@@ -211,6 +239,7 @@ int vdec_msg_queue_init(
return 0;
vdec_msg_queue_init_ctx(&msg_queue->lat_ctx, MTK_VDEC_LAT0);
+ INIT_WORK(&msg_queue->core_work, vdec_msg_queue_core_work);
msg_queue->wdma_addr.size = vde_msg_queue_get_trans_size(
ctx->picinfo.buf_w, ctx->picinfo.buf_h);
@@ -67,6 +67,7 @@ struct vdec_lat_buf {
* @wdma_addr: wdma address used for ube
* @wdma_rptr_addr: ube read point
* @wdma_wptr_addr: ube write point
+ * @core_work: core hardware work
* @lat_ctx: used to store lat buffer list
*/
struct vdec_msg_queue {
@@ -76,6 +77,7 @@ struct vdec_msg_queue {
uint64_t wdma_rptr_addr;
uint64_t wdma_wptr_addr;
+ struct work_struct core_work;
struct vdec_msg_queue_ctx lat_ctx;
};
@@ -86,10 +88,8 @@ struct vdec_msg_queue {
* @core_decode: core decode callback for each codec
* @private_size: the private data size used to share with core
*/
-int vdec_msg_queue_init(
- struct vdec_msg_queue *msg_queue,
- struct mtk_vcodec_ctx *ctx,
- core_decode_cb_t core_decode,
+int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue,
+ struct mtk_vcodec_ctx *ctx, core_decode_cb_t core_decode,
int private_size);
/**
Add work queue to process core hardware information. First, get lat_buf from message queue, then call core hardware of each codec(H264/VP9/AV1) to decode, finally puts lat_buf back to the message. Signed-off-by: Yunfei Dong <yunfei.dong@mediatek.com> --- v8: using work queue for hardware decode instead of create thread. --- .../platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 16 +++++++- .../platform/mtk-vcodec/mtk_vcodec_drv.h | 3 ++ .../platform/mtk-vcodec/vdec_msg_queue.c | 41 ++++++++++++++++--- .../platform/mtk-vcodec/vdec_msg_queue.h | 8 ++-- 4 files changed, 57 insertions(+), 11 deletions(-)